#install.packages('TDAmapper')
library(TDAmapper)
library(cluster)
library(vip)
##
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
##
## vi
#install.packages('kernlab’)
library(kernlab)
#install.packages(‘class’)
library(class)
#install.packages('nnet')
library(nnet)
#install.packages(‘randomForest’)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
#install.packages('e1071')
library(e1071)
#install.packages("BayesFactor")
library(BayesFactor)
## Loading required package: coda
##
## Attaching package: 'coda'
## The following object is masked from 'package:kernlab':
##
## nvar
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.5. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
##
## Type BFManual() to open the manual.
## ************
library(BayesPPD)
library(bayestestR)
#install.packages('igraph')
library('igraph')
## Warning: package 'igraph' was built under R version 4.3.3
##
## Attaching package: 'igraph'
## The following object is masked from 'package:BayesFactor':
##
## compare
## The following object is masked from 'package:class':
##
## knn
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
#install.packages('locfit')
library(locfit)
## locfit 1.5-9.8 2023-06-11
#install.packages('ggplot2’)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
## The following object is masked from 'package:kernlab':
##
## alpha
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
## The following object is masked from 'package:randomForest':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#install.packages('networkD3')
library(networkD3)
library(rstanarm)
## Loading required package: Rcpp
## This is rstanarm version 2.26.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
library(see)
#install.packages('tidyverse')
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%() masks igraph::%--%()
## ✖ ggplot2::alpha() masks kernlab::alpha()
## ✖ tibble::as_data_frame() masks dplyr::as_data_frame(), igraph::as_data_frame()
## ✖ dplyr::combine() masks randomForest::combine()
## ✖ purrr::compose() masks igraph::compose()
## ✖ purrr::cross() masks kernlab::cross()
## ✖ tidyr::crossing() masks igraph::crossing()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::margin() masks randomForest::margin()
## ✖ purrr::none() masks locfit::none()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ purrr::simplify() masks igraph::simplify()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages('caret')
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
##
## The following objects are masked from 'package:rstanarm':
##
## compare_models, R2
#install.packages('ISLR')
library(ISLR)
#install.packages('MCMCpack')
library(MCMCpack)
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2025 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
#linstall.packages("caret")
library(caret)
library(TDA)
##
## Attaching package: 'TDA'
##
## The following object is masked from 'package:cluster':
##
## silhouette
library(TDAstats)
library(ks)
##
## Attaching package: 'ks'
##
## The following object is masked from 'package:TDA':
##
## kde
##
## The following object is masked from 'package:MCMCpack':
##
## vech
##
## The following object is masked from 'package:igraph':
##
## compare
##
## The following object is masked from 'package:BayesFactor':
##
## compare
#install.packages('MLmetrics')
library(MLmetrics)
##
## Attaching package: 'MLmetrics'
##
## The following objects are masked from 'package:caret':
##
## MAE, RMSE
##
## The following object is masked from 'package:base':
##
## Recall
#install.packages('googledrive')
library(googledrive)
#install.packages('stringr')
library(stringr)
#install.packages('ks')
library(ks)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
#import adult dataset from UCI repository stored on my desktop
#Adult **
adult <- read.csv("~/Desktop/NCU/DissertationDatasets/Adult/adult.data", header=FALSE)
head(str(adult))
## 'data.frame': 32561 obs. of 15 variables:
## $ V1 : int 39 50 38 53 28 37 49 52 31 42 ...
## $ V2 : chr " State-gov" " Self-emp-not-inc" " Private" " Private" ...
## $ V3 : int 77516 83311 215646 234721 338409 284582 160187 209642 45781 159449 ...
## $ V4 : chr " Bachelors" " Bachelors" " HS-grad" " 11th" ...
## $ V5 : int 13 13 9 7 13 14 5 9 14 13 ...
## $ V6 : chr " Never-married" " Married-civ-spouse" " Divorced" " Married-civ-spouse" ...
## $ V7 : chr " Adm-clerical" " Exec-managerial" " Handlers-cleaners" " Handlers-cleaners" ...
## $ V8 : chr " Not-in-family" " Husband" " Not-in-family" " Husband" ...
## $ V9 : chr " White" " White" " White" " Black" ...
## $ V10: chr " Male" " Male" " Male" " Male" ...
## $ V11: int 2174 0 0 0 0 0 0 0 14084 5178 ...
## $ V12: int 0 0 0 0 0 0 0 0 0 0 ...
## $ V13: int 40 13 40 40 40 40 16 45 50 40 ...
## $ V14: chr " United-States" " United-States" " United-States" " United-States" ...
## $ V15: chr " <=50K" " <=50K" " <=50K" " <=50K" ...
## NULL
summary(adult)
## V1 V2 V3 V4
## Min. :17.00 Length:32561 Min. : 12285 Length:32561
## 1st Qu.:28.00 Class :character 1st Qu.: 117827 Class :character
## Median :37.00 Mode :character Median : 178356 Mode :character
## Mean :38.58 Mean : 189778
## 3rd Qu.:48.00 3rd Qu.: 237051
## Max. :90.00 Max. :1484705
## V5 V6 V7 V8
## Min. : 1.00 Length:32561 Length:32561 Length:32561
## 1st Qu.: 9.00 Class :character Class :character Class :character
## Median :10.00 Mode :character Mode :character Mode :character
## Mean :10.08
## 3rd Qu.:12.00
## Max. :16.00
## V9 V10 V11 V12
## Length:32561 Length:32561 Min. : 0 Min. : 0.0
## Class :character Class :character 1st Qu.: 0 1st Qu.: 0.0
## Mode :character Mode :character Median : 0 Median : 0.0
## Mean : 1078 Mean : 87.3
## 3rd Qu.: 0 3rd Qu.: 0.0
## Max. :99999 Max. :4356.0
## V13 V14 V15
## Min. : 1.00 Length:32561 Length:32561
## 1st Qu.:40.00 Class :character Class :character
## Median :40.00 Mode :character Mode :character
## Mean :40.44
## 3rd Qu.:45.00
## Max. :99.00
ggpairs(adult[,c(1,3,5,11,12,13,15)])
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggpairs(adult, columns = c(1,3,5,11,12,13,15), aes(color = V15))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##Add Bayesian tests functions
#create function to conduct the Bayesian Sign Test
BayesianSignTest <- function(diffVector,rope_min,rope_max) {
library(MCMCpack)
samples <- 3000
#build the vector 0.5 1 1 ....... 1
weights <- c(0.5,rep(1,length(diffVector)))
#add the fake first observation in 0
diffVector <- c (0, diffVector)
#for the moment we implement the sign test. Signedrank will follows
probLeft <- mean (diffVector < rope_min)
probRope <- mean (diffVector > rope_min & diffVector < rope_max)
probRight <- mean (diffVector > rope_max)
results = list ("probLeft"=probLeft, "probRope"=probRope,
"probRight"=probRight)
return (results)
}
##Create function to conduct Bayesian Signed Rank Test
BayesianSignedRank <- function(diffVector,rope_min,rope_max) {
library(MCMCpack)
samples <- 30000
#build the vector 0.5 1 1 ....... 1
weights <- c(0.5,rep(1,length(diffVector)))
#add the fake first observation in 0
diffVector <- c (0, diffVector)
sampledWeights <- rdirichlet(samples,weights)
winLeft <- vector(length = samples)
winRope <- vector(length = samples)
winRight <- vector(length = samples)
for (rep in 1:samples){
currentWeights <- sampledWeights[rep,]
for (i in 1:length(currentWeights)){
for (j in 1:length(currentWeights)){
product= currentWeights[i] * currentWeights[j]
if (diffVector[i]+diffVector[j] > (2*rope_max) ) {
winRight[rep] <- winRight[rep] + product
}
else if (diffVector[i]+diffVector[j] > (2*rope_min) ) {
winRope[rep] <- winRope[rep] + product
}
else {
winLeft[rep] <- winLeft[rep] + product
}
}
}
maxWins=max(winRight[rep],winRope[rep],winLeft[rep])
winners = (winRight[rep]==maxWins)*1 + (winRope[rep]==maxWins)*1 + (winLeft[rep]==maxWins)*1
winRight[rep] <- (winRight[rep]==maxWins)*1/winners
winRope[rep] <- (winRope[rep]==maxWins)*1/winners
winLeft[rep] <- (winLeft[rep]==maxWins)*1/winners
}
results = list ("winLeft"=mean(winLeft), "winRope"=mean(winRope),
"winRight"=mean(winRight) )
return (results)
}
#Create function to conduct the Bayesian Correlated t.test
#diff_a_b is a vector of differences between the two classifiers, on each fold of cross-validation.
#If you have done 10 runs of 10-folds cross-validation, you have 100 results for each classifier.
#You should have run cross-validation on the same folds for the two classifiers.
#Then diff_a_b is the difference fold-by-fold.
#rho is the correlation of the cross-validation results: 1/(number of folds)
#rope_min and rope_max are the lower and the upper bound of the rope
correlatedBayesianTtest <- function(diff_a_b,rho,rope_min,rope_max){
if (rope_max < rope_min){
stop("rope_max should be larger than rope_min")
}
delta <- mean(diff_a_b)
n <- length(diff_a_b)
df <- n-1
stdX <- sd(diff_a_b)
sp <- sd(diff_a_b)*sqrt(1/n + rho/(1-rho))
p.left <- pt((rope_min - delta)/sp, df)
p.rope <- pt((rope_max - delta)/sp, df)-p.left
results <- list('left'=p.left,'rope'=p.rope,'right'=1-p.left-p.rope)
return (results)
}
set.seed(16974)
###Prepare datasets for One hot encoding if necessary and Persistent homology of each dataset.
##One hot encoding for adult dataset
library(caret)
#define one-hot encoding function
dummy.adult <- dummyVars(" ~ .", data=adult)
#perform one-hot encoding on data frame
adult.one_hot_df <- data.frame(predict(dummy.adult, newdata=adult))
#str final data frame
head(str(adult.one_hot_df))
## 'data.frame': 32561 obs. of 110 variables:
## $ V1 : num 39 50 38 53 28 37 49 52 31 42 ...
## $ V2.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Federal.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Local.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Never.worked : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Private : num 0 0 1 1 1 1 1 0 1 1 ...
## $ V2.Self.emp.inc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Self.emp.not.inc : num 0 1 0 0 0 0 0 1 0 0 ...
## $ V2.State.gov : num 1 0 0 0 0 0 0 0 0 0 ...
## $ V2.Without.pay : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V3 : num 77516 83311 215646 234721 338409 ...
## $ V4.10th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.11th : num 0 0 0 1 0 0 0 0 0 0 ...
## $ V4.12th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.1st.4th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.5th.6th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.7th.8th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.9th : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V4.Assoc.acdm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Assoc.voc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Bachelors : num 1 1 0 0 1 0 0 0 0 1 ...
## $ V4.Doctorate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.HS.grad : num 0 0 1 0 0 0 0 1 0 0 ...
## $ V4.Masters : num 0 0 0 0 0 1 0 0 1 0 ...
## $ V4.Preschool : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Prof.school : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Some.college : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V5 : num 13 13 9 7 13 14 5 9 14 13 ...
## $ V6.Divorced : num 0 0 1 0 0 0 0 0 0 0 ...
## $ V6.Married.AF.spouse : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Married.civ.spouse : num 0 1 0 1 1 1 0 1 0 1 ...
## $ V6.Married.spouse.absent : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V6.Never.married : num 1 0 0 0 0 0 0 0 1 0 ...
## $ V6.Separated : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Widowed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Adm.clerical : num 1 0 0 0 0 0 0 0 0 0 ...
## $ V7.Armed.Forces : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Craft.repair : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Exec.managerial : num 0 1 0 0 0 1 0 1 0 1 ...
## $ V7.Farming.fishing : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Handlers.cleaners : num 0 0 1 1 0 0 0 0 0 0 ...
## $ V7.Machine.op.inspct : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Other.service : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V7.Priv.house.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Prof.specialty : num 0 0 0 0 1 0 0 0 1 0 ...
## $ V7.Protective.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Sales : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Tech.support : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Transport.moving : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Husband : num 0 1 0 1 0 0 0 1 0 1 ...
## $ V8.Not.in.family : num 1 0 1 0 0 0 1 0 1 0 ...
## $ V8.Other.relative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Own.child : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Unmarried : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Wife : num 0 0 0 0 1 1 0 0 0 0 ...
## $ V9.Amer.Indian.Eskimo : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Asian.Pac.Islander : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Black : num 0 0 0 1 1 0 1 0 0 0 ...
## $ V9.Other : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.White : num 1 1 1 0 0 1 0 1 1 1 ...
## $ V10.Female : num 0 0 0 0 1 1 1 0 1 0 ...
## $ V10.Male : num 1 1 1 1 0 0 0 1 0 1 ...
## $ V11 : num 2174 0 0 0 0 ...
## $ V12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V13 : num 40 13 40 40 40 40 16 45 50 40 ...
## $ V14.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cambodia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Canada : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.China : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Columbia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cuba : num 0 0 0 0 1 0 0 0 0 0 ...
## $ V14.Dominican.Republic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ecuador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.El.Salvador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.England : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.France : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Germany : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Greece : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Guatemala : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Haiti : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Holand.Netherlands : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Honduras : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hong : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hungary : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.India : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Iran : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ireland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Italy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Jamaica : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V14.Japan : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Laos : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Mexico : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Nicaragua : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Outlying.US.Guam.USVI.etc.: num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Peru : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Philippines : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Poland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Portugal : num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## NULL
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66)]
##Persistent homology of adult dataset
#create a random sample of adult.one_hot dataset to see if a barcode and persistent diagram can resolve from size of the dataset.
adult.one_hot_1000_df <- adult.one_hot_df[sample(nrow(adult.one_hot_df), size = 1000, replace = FALSE), ]
head(str(adult.one_hot_1000_df))
## 'data.frame': 1000 obs. of 110 variables:
## $ V1 : num 33 25 39 21 32 26 20 58 24 63 ...
## $ V2.. : num 0 0 0 0 0 0 1 1 0 0 ...
## $ V2.Federal.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Local.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Never.worked : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Private : num 1 1 1 1 1 1 0 0 1 0 ...
## $ V2.Self.emp.inc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Self.emp.not.inc : num 0 0 0 0 0 0 0 0 0 1 ...
## $ V2.State.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Without.pay : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V3 : num 176992 105693 234901 198050 134886 ...
## $ V4.10th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.11th : num 0 0 0 0 0 0 0 1 0 0 ...
## $ V4.12th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.1st.4th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.5th.6th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.7th.8th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.9th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Assoc.acdm : num 0 0 1 1 0 0 0 0 0 0 ...
## $ V4.Assoc.voc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Bachelors : num 0 1 0 0 0 1 0 0 0 0 ...
## $ V4.Doctorate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.HS.grad : num 0 0 0 0 1 0 0 0 1 0 ...
## $ V4.Masters : num 1 0 0 0 0 0 0 0 0 0 ...
## $ V4.Preschool : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Prof.school : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Some.college : num 0 0 0 0 0 0 1 0 0 1 ...
## $ V5 : num 14 13 12 12 9 13 10 7 9 10 ...
## $ V6.Divorced : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Married.AF.spouse : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Married.civ.spouse : num 1 0 0 0 1 0 0 1 0 1 ...
## $ V6.Married.spouse.absent : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Never.married : num 0 1 0 1 0 1 1 0 1 0 ...
## $ V6.Separated : num 0 0 1 0 0 0 0 0 0 0 ...
## $ V6.Widowed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.. : num 0 0 0 0 0 0 1 1 0 0 ...
## $ V7.Adm.clerical : num 0 0 1 1 1 0 0 0 0 0 ...
## $ V7.Armed.Forces : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Craft.repair : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Exec.managerial : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Farming.fishing : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Handlers.cleaners : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Machine.op.inspct : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Other.service : num 0 0 0 0 0 0 0 0 1 1 ...
## $ V7.Priv.house.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Prof.specialty : num 1 1 0 0 0 1 0 0 0 0 ...
## $ V7.Protective.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Sales : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Tech.support : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Transport.moving : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Husband : num 1 0 0 0 0 0 0 1 0 1 ...
## $ V8.Not.in.family : num 0 1 0 1 0 1 0 0 0 0 ...
## $ V8.Other.relative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Own.child : num 0 0 0 0 0 0 0 0 1 0 ...
## $ V8.Unmarried : num 0 0 1 0 0 0 1 0 0 0 ...
## $ V8.Wife : num 0 0 0 0 1 0 0 0 0 0 ...
## $ V9.Amer.Indian.Eskimo : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Asian.Pac.Islander : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Black : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Other : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.White : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V10.Female : num 0 1 0 1 1 1 1 0 1 0 ...
## $ V10.Male : num 1 0 1 0 0 0 0 1 0 1 ...
## $ V11 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V13 : num 40 40 40 25 40 40 20 16 25 48 ...
## $ V14.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cambodia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Canada : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.China : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Columbia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cuba : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Dominican.Republic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ecuador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.El.Salvador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.England : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.France : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Germany : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Greece : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Guatemala : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Haiti : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Holand.Netherlands : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Honduras : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hong : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hungary : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.India : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Iran : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ireland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Italy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Jamaica : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Japan : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Laos : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Mexico : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Nicaragua : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Outlying.US.Guam.USVI.etc.: num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Peru : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Philippines : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Poland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Portugal : num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## NULL
summary(adult.one_hot_1000_df)
## V1 V2.. V2.Federal.gov V2.Local.gov
## Min. :17.00 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:28.00 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :37.00 Median :0.000 Median :0.000 Median :0.000
## Mean :38.64 Mean :0.077 Mean :0.025 Mean :0.064
## 3rd Qu.:47.00 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :90.00 Max. :1.000 Max. :1.000 Max. :1.000
## V2.Never.worked V2.Private V2.Self.emp.inc V2.Self.emp.not.inc
## Min. :0 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0 Median :1.000 Median :0.000 Median :0.000
## Mean :0 Mean :0.679 Mean :0.037 Mean :0.079
## 3rd Qu.:0 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :0 Max. :1.000 Max. :1.000 Max. :1.000
## V2.State.gov V2.Without.pay V3 V4.10th
## Min. :0.000 Min. :0 Min. : 19302 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0 1st Qu.:123797 1st Qu.:0.000
## Median :0.000 Median :0 Median :181982 Median :0.000
## Mean :0.039 Mean :0 Mean :195583 Mean :0.041
## 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:242529 3rd Qu.:0.000
## Max. :1.000 Max. :0 Max. :721161 Max. :1.000
## V4.11th V4.12th V4.1st.4th V4.5th.6th
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.032 Mean :0.015 Mean :0.005 Mean :0.015
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V4.7th.8th V4.9th V4.Assoc.acdm V4.Assoc.voc V4.Bachelors
## Min. :0.000 Min. :0.000 Min. :0.00 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.00 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.00 Median :0.000 Median :0.000
## Mean :0.015 Mean :0.018 Mean :0.04 Mean :0.052 Mean :0.155
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.00 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.00 Max. :1.000 Max. :1.000
## V4.Doctorate V4.HS.grad V4.Masters V4.Preschool
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.014 Mean :0.327 Mean :0.053 Mean :0.002
## 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V4.Prof.school V4.Some.college V5 V6.Divorced
## Min. :0.000 Min. :0.000 Min. : 1 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.: 9 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :10 Median :0.000
## Mean :0.014 Mean :0.202 Mean :10 Mean :0.132
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:12 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :16 Max. :1.000
## V6.Married.AF.spouse V6.Married.civ.spouse V6.Married.spouse.absent
## Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0 Median :0.000 Median :0.000
## Mean :0 Mean :0.464 Mean :0.005
## 3rd Qu.:0 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :0 Max. :1.000 Max. :1.000
## V6.Never.married V6.Separated V6.Widowed V7..
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.325 Mean :0.041 Mean :0.033 Mean :0.077
## 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V7.Adm.clerical V7.Armed.Forces V7.Craft.repair V7.Exec.managerial
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.117 Mean :0.001 Mean :0.129 Mean :0.124
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V7.Farming.fishing V7.Handlers.cleaners V7.Machine.op.inspct V7.Other.service
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.029 Mean :0.041 Mean :0.071 Mean :0.091
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V7.Priv.house.serv V7.Prof.specialty V7.Protective.serv V7.Sales
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.003 Mean :0.119 Mean :0.018 Mean :0.102
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V7.Tech.support V7.Transport.moving V8.Husband V8.Not.in.family
## Min. :0.000 Min. :0.000 Min. :0.00 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.00 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.00 Median :0.000
## Mean :0.035 Mean :0.043 Mean :0.41 Mean :0.261
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:1.00 3rd Qu.:1.000
## Max. :1.000 Max. :1.000 Max. :1.00 Max. :1.000
## V8.Other.relative V8.Own.child V8.Unmarried V8.Wife
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.027 Mean :0.136 Mean :0.115 Mean :0.051
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V9.Amer.Indian.Eskimo V9.Asian.Pac.Islander V9.Black V9.Other
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.014 Mean :0.029 Mean :0.104 Mean :0.007
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V9.White V10.Female V10.Male V11
## Min. :0.000 Min. :0.000 Min. :0.000 Min. : 0.0
## 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.: 0.0
## Median :1.000 Median :0.000 Median :1.000 Median : 0.0
## Mean :0.846 Mean :0.339 Mean :0.661 Mean : 868.9
## 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.: 0.0
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :99999.0
## V12 V13 V14.. V14.Cambodia V14.Canada
## Min. : 0.00 Min. : 1.0 Min. :0.000 Min. :0 Min. :0.000
## 1st Qu.: 0.00 1st Qu.:40.0 1st Qu.:0.000 1st Qu.:0 1st Qu.:0.000
## Median : 0.00 Median :40.0 Median :0.000 Median :0 Median :0.000
## Mean : 92.56 Mean :40.5 Mean :0.024 Mean :0 Mean :0.003
## 3rd Qu.: 0.00 3rd Qu.:45.0 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0.000
## Max. :2457.00 Max. :99.0 Max. :1.000 Max. :0 Max. :1.000
## V14.China V14.Columbia V14.Cuba V14.Dominican.Republic
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.003 Mean :0.002 Mean :0.005 Mean :0.002
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V14.Ecuador V14.El.Salvador V14.England V14.France
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.001 Mean :0.003 Mean :0.003 Mean :0.001
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V14.Germany V14.Greece V14.Guatemala V14.Haiti
## Min. :0.000 Min. :0.000 Min. :0 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0 Median :0.000
## Mean :0.002 Mean :0.002 Mean :0 Mean :0.002
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :0 Max. :1.000
## V14.Holand.Netherlands V14.Honduras V14.Hong V14.Hungary
## Min. :0 Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0 Median :0 Median :0.000 Median :0.000
## Mean :0 Mean :0 Mean :0.001 Mean :0.002
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :0 Max. :0 Max. :1.000 Max. :1.000
## V14.India V14.Iran V14.Ireland V14.Italy V14.Jamaica
## Min. :0.000 Min. :0 Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0 Median :0 Median :0.000 Median :0.000
## Mean :0.004 Mean :0 Mean :0 Mean :0.003 Mean :0.003
## 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :0 Max. :0 Max. :1.000 Max. :1.000
## V14.Japan V14.Laos V14.Mexico V14.Nicaragua
## Min. :0.000 Min. :0 Min. :0.000 Min. :0
## 1st Qu.:0.000 1st Qu.:0 1st Qu.:0.000 1st Qu.:0
## Median :0.000 Median :0 Median :0.000 Median :0
## Mean :0.003 Mean :0 Mean :0.022 Mean :0
## 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0
## Max. :1.000 Max. :0 Max. :1.000 Max. :0
## V14.Outlying.US.Guam.USVI.etc. V14.Peru V14.Philippines V14.Poland
## Min. :0 Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0 Median :0 Median :0.000 Median :0.000
## Mean :0 Mean :0 Mean :0.004 Mean :0.002
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :0 Max. :0 Max. :1.000 Max. :1.000
## V14.Portugal V14.Puerto.Rico V14.Scotland V14.South V14.Taiwan
## Min. :0.000 Min. :0.000 Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0 Median :0.000 Median :0.000
## Mean :0.001 Mean :0.004 Mean :0 Mean :0.001 Mean :0.001
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :0 Max. :1.000 Max. :1.000
## V14.Thailand V14.Trinadad.Tobago V14.United.States V14.Vietnam
## Min. :0 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:0.000
## Median :0 Median :0.000 Median :1.000 Median :0.000
## Mean :0 Mean :0.002 Mean :0.891 Mean :0.003
## 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :0 Max. :1.000 Max. :1.000 Max. :1.000
## V14.Yugoslavia V15...50K V15..50K
## Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:1.000 1st Qu.:0.000
## Median :0 Median :1.000 Median :0.000
## Mean :0 Mean :0.769 Mean :0.231
## 3rd Qu.:0 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :0 Max. :1.000 Max. :1.000
# calculate persistent homology for adult.one_hot_1000_df
phom.adult.one_hot_1000_df <- calculate_homology(adult.one_hot_1000_df)
# plot barcode for adult.one_hot_1000_df
plot_barcode(phom.adult.one_hot_1000_df)

# plot persistent diagram of adult.one_hot_1000_df dataset
plot_persist(phom.adult.one_hot_1000_df)

#####———————————————MAPPER ALGORITHM————————————————
#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)]
##Two Filter Functions PCA & KDE
#Prepare linear PCA as a filter function by centering and scaling dataset first on all one hot df dataset
b<- prcomp(adult.one_hot_df, center=TRUE, scale=TRUE)
ts_pca_b <- as.data.frame(predict(b, adult.one_hot_df))
#Conduct kernel density estimator as a filter function on 4 of 6
filter.kde <- kde(adult.one_hot_df3[,1:4],H=diag(1,nrow = 4),eval.points =adult.one_hot_df3[,1:4])$estimate
###*** Adult PCA Mapper 5 intervals, 60% overlap, 5 bins
##*** Adult PCA Mapper 5 intervals, 60% overlap, 5 bins
m_adult_5.60.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(ts_pca_b$PC1),
num_intervals = 5,
percent_overlap = 60,
num_bins_when_clustering = 5)
g_adult_5.60.5 <- graph.adjacency(m_adult_5.60.5$adjacency, mode="undirected")
## Warning: `graph.adjacency()` was deprecated in igraph 2.0.0.
## ℹ Please use `graph_from_adjacency_matrix()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot(g_adult_5.60.5, layout = layout.auto(g_adult_5.60.5))
## Warning: `layout.auto()` was deprecated in igraph 2.0.0.
## ℹ Please use `layout_nicely()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

head(str(m_adult_5.60.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.60.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_adult_5.60.5$points_in_vertex))
## List of 5
## $ : int [1:6560] 2 8 10 11 12 15 21 26 28 39 ...
## $ : int [1:13933] 2 8 10 11 12 15 19 20 21 23 ...
## $ : int [1:15744] 1 2 3 4 5 6 9 11 15 16 ...
## $ : int [1:19829] 1 3 4 5 6 9 13 14 16 17 ...
## $ : int [1:16508] 1 3 5 7 13 14 17 18 22 25 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.60.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.60.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_adult_5.60.5 <- graph.adjacency(m_adult_5.60.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.60.5$points_in_vertex,
function(x) length(x)))
plot(g_adult_5.60.5, layout = layout.auto(g_adult_5.60.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

m_adult_5.60.5.n1<-m_adult_5.60.5$points_in_vertex[1]
m_adult_5.60.5.n1.vec<-as.vector(unlist(m_adult_5.60.5.n1))
m_adult_5.60.5.n2<-m_adult_5.60.5$points_in_vertex[2]
m_adult_5.60.5.n2.vec<-as.vector(unlist(m_adult_5.60.5.n2))
m_adult_5.60.5.n3<-m_adult_5.60.5$points_in_vertex[3]
m_adult_5.60.5.n3.vec<-as.vector(unlist(m_adult_5.60.5.n3))
m_adult_5.60.5.n4<-m_adult_5.60.5$points_in_vertex[4]
m_adult_5.60.5.n4.vec<-as.vector(unlist(m_adult_5.60.5.n4))
m_adult_5.60.5.n5<-m_adult_5.60.5$points_in_vertex[5]
m_adult_5.60.5.n5.vec<-as.vector(unlist(m_adult_5.60.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.60.5.n1.vec<-adult.one_hot_df4[m_adult_5.60.5.n1.vec,]
tda.m_adult_5.60.5.n2.vec<-adult.one_hot_df4[m_adult_5.60.5.n2.vec,]
tda.m_adult_5.60.5.n3.vec<-adult.one_hot_df4[m_adult_5.60.5.n3.vec,]
tda.m_adult_5.60.5.n4.vec<-adult.one_hot_df4[m_adult_5.60.5.n4.vec,]
tda.m_adult_5.60.5.n5.vec<-adult.one_hot_df4[m_adult_5.60.5.n5.vec,]
##*** Adult Mapper KDE Filter 5 intervals, 60% overlap, 5 bins
m_kde_adult_5.60.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(filter.kde),
num_intervals = 5,
percent_overlap = 60,
num_bins_when_clustering = 5)
g_kde_adult_5.60.5 <- graph.adjacency(m_kde_adult_5.60.5$adjacency, mode="undirected")
plot(g_kde_adult_5.60.5, layout = layout.auto(g_kde_adult_5.60.5))

head(str(m_kde_adult_5.60.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.60.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_kde_adult_5.60.5$points_in_vertex))
## List of 5
## $ : int [1:15260] 2 4 5 6 7 9 13 16 19 20 ...
## $ : int [1:14482] 1 2 4 6 8 9 12 13 20 22 ...
## $ : int [1:13266] 1 2 8 10 11 12 13 14 27 28 ...
## $ : int [1:11795] 3 8 10 11 12 14 15 17 27 28 ...
## $ : int [1:8940] 3 15 17 18 27 32 37 39 49 55 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.60.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.60.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_kde_adult_5.50.5 <- graph.adjacency(m_kde_adult_5.60.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.60.5$points_in_vertex,
function(x) length(x)))
plot(g_kde_adult_5.60.5, layout = layout.auto(g_kde_adult_5.60.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.60.5.n1<-m_kde_adult_5.60.5$points_in_vertex[1]
m_kde_adult_5.60.5.n1.vec<-as.vector(unlist(m_kde_adult_5.60.5.n1))
m_kde_adult_5.60.5.n2<-m_kde_adult_5.60.5$points_in_vertex[2]
m_kde_adult_5.60.5.n2.vec<-as.vector(unlist(m_kde_adult_5.60.5.n2))
m_kde_adult_5.60.5.n3<-m_kde_adult_5.60.5$points_in_vertex[3]
m_kde_adult_5.60.5.n3.vec<-as.vector(unlist(m_kde_adult_5.60.5.n3))
m_kde_adult_5.60.5.n4<-m_kde_adult_5.60.5$points_in_vertex[4]
m_kde_adult_5.60.5.n4.vec<-as.vector(unlist(m_kde_adult_5.60.5.n4))
m_kde_adult_5.60.5.n5<-m_kde_adult_5.60.5 $points_in_vertex[5]
m_kde_adult_5.60.5.n5.vec<-as.vector(unlist(m_kde_adult_5.60.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_kde_adult_5.60.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n1.vec,]
tda.m_kde_adult_5.60.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n2.vec,]
tda.m_kde_adult_5.60.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n3.vec,]
tda.m_kde_adult_5.60.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n4.vec,]
tda.m_kde_adult_5.60.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.60.5.n5.vec,]
library(caret)
#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)]
trainIndex <- createDataPartition(adult.one_hot_df4$adult_df1, p = .7,
list = FALSE,
times = 1)
head(trainIndex)
## Resample1
## [1,] 1
## [2,] 2
## [3,] 4
## [4,] 5
## [5,] 7
## [6,] 8
adult.one_hot_df4Train <- adult.one_hot_df4[ trainIndex,]
adult.one_hot_df4Test <- adult.one_hot_df4[-trainIndex,]
#Train Control: k-Fold Cross-validation basis for all models
fitControl <- trainControl(## 10-fold CV
method = "cv",
number = 3)
#Non-TDA-Assited
rfGrid<-expand.grid(mtry = (1:20)*50)
#Random Forest
adultRfFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
adultRfFit
## Random Forest
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15196, 15195, 15195
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8572372 0.5882934
## 100 0.8546924 0.5828444
## 150 0.8549557 0.5839639
## 200 0.8541660 0.5813642
## 250 0.8546925 0.5827775
## 300 0.8542976 0.5810137
## 350 0.8535957 0.5792854
## 400 0.8531569 0.5788817
## 450 0.8530691 0.5783394
## 500 0.8532884 0.5784096
## 550 0.8538589 0.5809612
## 600 0.8533324 0.5787809
## 650 0.8542099 0.5817356
## 700 0.8543415 0.5823679
## 750 0.8538588 0.5805136
## 800 0.8528498 0.5773478
## 850 0.8537712 0.5804901
## 900 0.8536833 0.5798539
## 950 0.8535079 0.5788895
## 1000 0.8543854 0.5818180
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
adultRfFit$resample
## Accuracy Kappa Resample
## 1 0.8636304 0.6053166 Fold1
## 2 0.8575941 0.5924862 Fold3
## 3 0.8504870 0.5670776 Fold2
ad_rf_fit_re<-adultRfFit$resample[1]
summary(adultRfFit)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 22793 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 45586 matrix numeric
## oob.times 22793 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 22793 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(adultRfFit,25) + ggtitle("non-TDA-Assisted: RF")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultRfFit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
rf_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
rf_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6929 891
## >50K 487 1461
##
## Accuracy : 0.8589
## 95% CI : (0.8519, 0.8658)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5901
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9343
## Specificity : 0.6212
## Pos Pred Value : 0.8861
## Neg Pred Value : 0.7500
## Prevalence : 0.7592
## Detection Rate : 0.7094
## Detection Prevalence : 0.8006
## Balanced Accuracy : 0.7778
##
## 'Positive' Class : <=50K
##
rf_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.589271e-01 5.901123e-01 8.518658e-01 8.657737e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.976785e-132 1.862103e-27
rf_cf_ov_acc<-rf_cf$overall[1]
rf_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9343312 0.6211735 0.8860614
## Neg Pred Value Precision Recall
## 0.7500000 0.8860614 0.9343312
## F1 Prevalence Detection Rate
## 0.9095563 0.7592138 0.7093571
## Detection Prevalence Balanced Accuracy
## 0.8005733 0.7777523
rf_cf_pre_rec_f1<-rf_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.60.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n1.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos,
## V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V8.Own.child, V14.Dominican.Republic,
## V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_PC_5.60.5_n1_RfFit0
## Random Forest
##
## 6560 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 4373, 4373, 4374
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8972570 0.3024224
## 100 0.8967994 0.3162878
## 150 0.8981714 0.3326416
## 200 0.8981715 0.3262756
## 250 0.8972567 0.3187978
## 300 0.8987809 0.3319605
## 350 0.8990860 0.3353872
## 400 0.8992384 0.3345968
## 450 0.8971043 0.3250847
## 500 0.8992385 0.3320094
## 550 0.8977139 0.3245236
## 600 0.8984765 0.3302399
## 650 0.8981712 0.3289740
## 700 0.8983237 0.3306323
## 750 0.8971040 0.3238449
## 800 0.8977141 0.3227463
## 850 0.8969515 0.3251635
## 900 0.8981714 0.3270742
## 950 0.8981711 0.3325710
## 1000 0.8977139 0.3231951
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 500.
Adult_TDA_PC_5.60.5_n1_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8930041 0.3253968 Fold1
## 2 0.9034767 0.3422705 Fold3
## 3 0.9012346 0.3283609 Fold2
ad_tda_pc_5.60.5_n1_rf_fit0_re<-Adult_TDA_PC_5.60.5_n1_RfFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n1_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 6560 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 13120 matrix numeric
## oob.times 6560 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 6560 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.60.5_n1_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n1_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.60.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n1_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 331 8
## >50K 7085 2344
##
## Accuracy : 0.2739
## 95% CI : (0.265, 0.2828)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0203
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.04463
## Specificity : 0.99660
## Pos Pred Value : 0.97640
## Neg Pred Value : 0.24859
## Prevalence : 0.75921
## Detection Rate : 0.03389
## Detection Prevalence : 0.03471
## Balanced Accuracy : 0.52062
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 331 8
## >50K 7085 2344
##
## Accuracy : 0.2739
## 95% CI : (0.265, 0.2828)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0203
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.04463
## Specificity : 0.99660
## Pos Pred Value : 0.97640
## Neg Pred Value : 0.24859
## Prevalence : 0.75921
## Detection Rate : 0.03389
## Detection Prevalence : 0.03471
## Balanced Accuracy : 0.52062
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n1_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.27385340 0.02033811 0.26502734 0.28281448 0.75921376
## AccuracyPValue McnemarPValue
## 1.00000000 0.00000000
ad_tda_pc_5.60.5_n1_rf_cf0_ov_acc<-ad_tda_pc_5.60.5_n1_rf_cf0$overall[1]
ad_tda_pc_5.60.5_n1_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.04463323 0.99659864 0.97640118
## Neg Pred Value Precision Recall
## 0.24859476 0.97640118 0.04463323
## F1 Prevalence Detection Rate
## 0.08536428 0.75921376 0.03388616
## Detection Prevalence Balanced Accuracy
## 0.03470516 0.52061593
ad_tda_pc_5.60.5_n1_rf_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n1_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.60.5_n1_rf_fit0_re)
diff_tda_pca_5.60.5_rf_n1_3_fold
## Accuracy
## 1 -0.02937373
## 2 -0.04588257
## 3 -0.05074760
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n1_3_fold_odds.left<-bst_tda_pca_5.60.5_rf.n1_3_fold$probLeft/bst_tda_pca_5.60.5_rf.n1_3_fold$probRight
bst_tda_pca_5.60.5_rf.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n1_3_fold
## $winLeft
## [1] 0.9915333
##
## $winRope
## [1] 0.008466667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n1_3_fold
## $left
## [1] 0.9748058
##
## $rope
## [1] 0.01518858
##
## $right
## [1] 0.01000558
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold))
#bf_tda_pca_5.60.5_rf.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_rf_n1_3_fold)
## t = -6.4936, df = 2, p-value = 0.0229
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.06983126 -0.01417134
## sample estimates:
## mean of x
## -0.0420013
### Test set diff
diff_tda_pca_5.60.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_pc_5.60.5_n1_rf_cf0_ov_acc)
diff_tda_pca_5.60.5_rf.n1_test
## Accuracy
## 0.5850737
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf.n1_test),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n1_test_odds.left<-bst_tda_pca_5.60.5_rf.n1_test$probLeft/bst_tda_pca_5.60.5_rf.n1_test$probRight
bst_tda_pca_5.60.5_rf.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf.n1_test),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1560333
##
## $winRight
## [1] 0.8439667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_rf.n1_test)))
#BayesFactor
#bf_tda_pca_5.60.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf.n1_test)) #bf_tda_pca_5.60.5_rf.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_rf.n1_test))
##Node2
Adult_TDA_PC_5.60.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Armed.Forces,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
Adult_TDA_PC_5.60.5_n2_RfFit0
## Random Forest
##
## 13933 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9288, 9289, 9289
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.7492283 0.4989153
## 100 0.7448503 0.4901105
## 150 0.7447069 0.4898526
## 200 0.7444197 0.4892706
## 250 0.7437021 0.4877961
## 300 0.7437738 0.4879519
## 350 0.7419076 0.4842250
## 400 0.7426253 0.4856567
## 450 0.7432713 0.4869599
## 500 0.7435585 0.4875201
## 550 0.7435584 0.4875334
## 600 0.7453528 0.4910833
## 650 0.7439889 0.4883776
## 700 0.7452810 0.4909784
## 750 0.7442761 0.4889678
## 800 0.7433431 0.4870727
## 850 0.7439174 0.4882167
## 900 0.7434148 0.4872738
## 950 0.7441326 0.4886757
## 1000 0.7422666 0.4849815
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.60.5_n2_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.7509150 0.5023416 Fold1
## 2 0.7508613 0.5022625 Fold3
## 3 0.7459087 0.4921418 Fold2
ad_tda_pc_5.60.5_n2_rf_fit0_re<-Adult_TDA_PC_5.60.5_n2_RfFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n2_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 13933 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 27866 matrix numeric
## oob.times 13933 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 13933 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.60.5_n2_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n2_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.60.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n2_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 2212 10
## >50K 5204 2342
##
## Accuracy : 0.4662
## 95% CI : (0.4563, 0.4762)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1676
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2983
## Specificity : 0.9957
## Pos Pred Value : 0.9955
## Neg Pred Value : 0.3104
## Prevalence : 0.7592
## Detection Rate : 0.2265
## Detection Prevalence : 0.2275
## Balanced Accuracy : 0.6470
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 2212 10
## >50K 5204 2342
##
## Accuracy : 0.4662
## 95% CI : (0.4563, 0.4762)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1676
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2983
## Specificity : 0.9957
## Pos Pred Value : 0.9955
## Neg Pred Value : 0.3104
## Prevalence : 0.7592
## Detection Rate : 0.2265
## Detection Prevalence : 0.2275
## Balanced Accuracy : 0.6470
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n2_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.4662162 0.1676317 0.4562835 0.4761691 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.60.5_n2_rf_cf0_ov_acc<-ad_tda_pc_5.60.5_n2_rf_cf0$overall[1]
ad_tda_pc_5.60.5_n2_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.2982740 0.9957483 0.9954995
## Neg Pred Value Precision Recall
## 0.3103631 0.9954995 0.2982740
## F1 Prevalence Detection Rate
## 0.4590164 0.7592138 0.2264537
## Detection Prevalence Balanced Accuracy
## 0.2274775 0.6470112
ad_tda_pc_5.60.5_n2_rf_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n2_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.60.5_n2_rf_fit0_re)
diff_tda_pca_5.60.5_rf_n2_3_fold
## Accuracy
## 1 0.1127154
## 2 0.1067328
## 3 0.1045783
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n2_3_fold_odds.left<-bst_tda_pca_5.60.5_rf.n2_3_fold$probLeft/bst_tda_pca_5.60.5_rf.n2_3_fold$probRight
bst_tda_pca_5.60.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009066667
##
## $winRight
## [1] 0.9909333
# Bayesian Correlated Test
bct_tda_pca_5.60.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n2_3_fold
## $left
## [1] 0.0002833915
##
## $rope
## [1] 0.0001273035
##
## $right
## [1] 0.9995893
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold))
#bf_tda_pca_5.60.5_rf.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_rf_n2_3_fold)
## t = 44.373, df = 2, p-value = 0.0005075
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.09753574 0.11848190
## sample estimates:
## mean of x
## 0.1080088
### Test set diff
diff_tda_pca_5.60.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_pc_5.60.5_n2_rf_cf0_ov_acc)
diff_tda_pca_5.60.5_rf.n2_test
## Accuracy
## 0.3927109
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf.n2_test),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n2_test_odds.left<-bst_tda_pca_5.60.5_rf.n2_test$probLeft/bst_tda_pca_5.60.5_rf.n2_test$probRight
bst_tda_pca_5.60.5_rf.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf.n2_test),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1582
##
## $winRight
## [1] 0.8418
# Bayesian Correlated Test
bct_tda_pca_5.60.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(as.matrix(diff_tda_pca_5.60.5_rf.n2_test),c(-0.01,0.01)))
#BayesFactor
#bf_tda_pca_5.60.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf.n2_test)) #bf_tda_pca_5.60.5_rf.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_rf.n2_test))
##Node3
Adult_TDA_PC_5.60.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n3.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
Adult_TDA_PC_5.60.5_n3_RfFit0
## Random Forest
##
## 15744 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 10495, 10496, 10497
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8067840 0.4308487
## 100 0.8041797 0.4259349
## 150 0.8035445 0.4243874
## 200 0.8021473 0.4211533
## 250 0.8041162 0.4252953
## 300 0.8037350 0.4249850
## 350 0.8037352 0.4243853
## 400 0.8027825 0.4231159
## 450 0.8034812 0.4241709
## 500 0.8017024 0.4181958
## 550 0.8031636 0.4238124
## 600 0.8041162 0.4257059
## 650 0.8032268 0.4229797
## 700 0.8039256 0.4254003
## 750 0.8042432 0.4270190
## 800 0.8028459 0.4233487
## 850 0.8037352 0.4249111
## 900 0.8050052 0.4286000
## 950 0.8021472 0.4207175
## 1000 0.8037986 0.4250153
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.60.5_n3_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8016765 0.4229297 Fold1
## 2 0.8088431 0.4303138 Fold3
## 3 0.8098323 0.4393027 Fold2
ad_tda_pc_5.60.5_n3_rf_fit0_re<-Adult_TDA_PC_5.60.5_n3_RfFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n3_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 15744 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 31488 matrix numeric
## oob.times 15744 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 15744 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.60.5_n3_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n3_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.60.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n3_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 5030 868
## >50K 2386 1484
##
## Accuracy : 0.6669
## 95% CI : (0.6574, 0.6762)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.2534
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6783
## Specificity : 0.6310
## Pos Pred Value : 0.8528
## Neg Pred Value : 0.3835
## Prevalence : 0.7592
## Detection Rate : 0.5149
## Detection Prevalence : 0.6038
## Balanced Accuracy : 0.6546
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 5030 868
## >50K 2386 1484
##
## Accuracy : 0.6669
## 95% CI : (0.6574, 0.6762)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.2534
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.6783
## Specificity : 0.6310
## Pos Pred Value : 0.8528
## Neg Pred Value : 0.3835
## Prevalence : 0.7592
## Detection Rate : 0.5149
## Detection Prevalence : 0.6038
## Balanced Accuracy : 0.6546
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n3_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 6.668714e-01 2.533811e-01 6.574245e-01 6.762187e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 8.053881e-156
ad_tda_pc_5.60.5_n3_rf_cf0_ov_acc<-ad_tda_pc_5.60.5_n3_rf_cf0$overall[1]
ad_tda_pc_5.60.5_n3_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.6782632 0.6309524 0.8528315
## Neg Pred Value Precision Recall
## 0.3834625 0.8528315 0.6782632
## F1 Prevalence Detection Rate
## 0.7555956 0.7592138 0.5149468
## Detection Prevalence Balanced Accuracy
## 0.6038084 0.6546078
ad_tda_pc_5.60.5_n3_rf_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n3_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.60.5_n3_rf_fit0_re)
diff_tda_pca_5.60.5_rf_n3_3_fold
## Accuracy
## 1 0.06195387
## 2 0.04875096
## 3 0.04065465
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n3_3_fold_odds.left<-bst_tda_pca_5.60.5_rf.n3_3_fold$probLeft/bst_tda_pca_5.60.5_rf.n3_3_fold$probRight
bst_tda_pca_5.60.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0095
##
## $winRight
## [1] 0.9905
# Bayesian Correlated Test
bct_tda_pca_5.60.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n2_3_fold
## $left
## [1] 0.0002833915
##
## $rope
## [1] 0.0001273035
##
## $right
## [1] 0.9995893
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold))
#bf_tda_pca_5.60.5_rf.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_rf_n3_3_fold)
## t = 8.1282, df = 2, p-value = 0.0148
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.02374582 0.07716050
## sample estimates:
## mean of x
## 0.05045316
### Test set diff
diff_tda_pca_5.60.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_pc_5.60.5_n3_rf_cf0_ov_acc)
diff_tda_pca_5.60.5_rf.n3_test
## Accuracy
## 0.1920557
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf.n3_test),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n3_test_odds.left<-bst_tda_pca_5.60.5_rf.n3_test$probLeft/bst_tda_pca_5.60.5_rf.n3_test$probRight
bst_tda_pca_5.60.5_rf.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf.n3_test),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1591667
##
## $winRight
## [1] 0.8408333
# Bayesian Correlated Test
bct_tda_pca_5.60.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_rf.n3_test))
#BayesFactor
#bf_tda_pca_5.60.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf.n3_test)) #bf_tda_pca_5.60.5_rf.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_rf.n2_test)
##Node4
Adult_TDA_PC_5.60.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n4.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_PC_5.60.5_n4_RfFit0
## Random Forest
##
## 19829 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 13220, 13219, 13219
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.9482072 0.4322754
## 100 0.9476525 0.4311878
## 150 0.9478542 0.4352071
## 200 0.9469969 0.4260941
## 250 0.9476525 0.4301942
## 300 0.9473499 0.4301530
## 350 0.9469969 0.4257237
## 400 0.9472995 0.4313980
## 450 0.9471482 0.4269776
## 500 0.9474003 0.4300012
## 550 0.9471482 0.4285603
## 600 0.9474003 0.4305603
## 650 0.9475012 0.4293020
## 700 0.9475516 0.4338274
## 750 0.9470473 0.4280267
## 800 0.9469465 0.4259176
## 850 0.9471482 0.4270210
## 900 0.9475516 0.4312180
## 950 0.9471986 0.4288736
## 1000 0.9472491 0.4281409
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.60.5_n4_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.9490089 0.4261855 Fold1
## 2 0.9456884 0.4148155 Fold3
## 3 0.9499244 0.4558253 Fold2
ad_tda_pc_5.60.5_n4_rf_fit0_re<-Adult_TDA_PC_5.60.5_n4_RfFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n4_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 19829 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 39658 matrix numeric
## oob.times 19829 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 19829 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.60.5_n4_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n4_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.60.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n4_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7413 1614
## >50K 3 738
##
## Accuracy : 0.8345
## 95% CI : (0.8269, 0.8418)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.409
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9996
## Specificity : 0.3138
## Pos Pred Value : 0.8212
## Neg Pred Value : 0.9960
## Prevalence : 0.7592
## Detection Rate : 0.7589
## Detection Prevalence : 0.9241
## Balanced Accuracy : 0.6567
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7413 1614
## >50K 3 738
##
## Accuracy : 0.8345
## 95% CI : (0.8269, 0.8418)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.409
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9996
## Specificity : 0.3138
## Pos Pred Value : 0.8212
## Neg Pred Value : 0.9960
## Prevalence : 0.7592
## Detection Rate : 0.7589
## Detection Prevalence : 0.9241
## Balanced Accuracy : 0.6567
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n4_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.344595e-01 4.090247e-01 8.269383e-01 8.417807e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.161054e-73 0.000000e+00
ad_tda_pc_5.60.5_n4_rf_cf0_ov_acc<-ad_tda_pc_5.60.5_n4_rf_cf0$overall[1]
ad_tda_pc_5.60.5_n4_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9995955 0.3137755 0.8212031
## Neg Pred Value Precision Recall
## 0.9959514 0.8212031 0.9995955
## F1 Prevalence Detection Rate
## 0.9016603 0.7592138 0.7589066
## Detection Prevalence Balanced Accuracy
## 0.9241400 0.6566855
ad_tda_pc_5.60.5_n4_rf_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n4_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.60.5_n4_rf_fit0_re)
diff_tda_pca_5.60.5_rf_n4_3_fold
## Accuracy
## 1 -0.08537855
## 2 -0.08809425
## 3 -0.09943739
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n4_3_fold_odds.left<-bst_tda_pca_5.60.5_rf.n4_3_fold$probLeft/bst_tda_pca_5.60.5_rf.n4_3_fold$probRight
bst_tda_pca_5.60.5_rf.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n4_3_fold
## $winLeft
## [1] 0.9915667
##
## $winRope
## [1] 0.008433333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n4_3_fold
## $left
## [1] 0.9981255
##
## $rope
## [1] 0.00066663
##
## $right
## [1] 0.001207875
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold))
#bf_tda_pca_5.60.5_rf.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_rf_n4_3_fold)
## t = -21.128, df = 2, p-value = 0.002233
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.10949571 -0.07244441
## sample estimates:
## mean of x
## -0.09097006
### Test set diff
diff_tda_pca_5.60.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_pc_5.60.5_n4_rf_cf0_ov_acc)
diff_tda_pca_5.60.5_rf.n4_test
## Accuracy
## 0.02446765
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf.n4_test),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n4_test_odds.left<-bst_tda_pca_5.60.5_rf.n4_test$probLeft/bst_tda_pca_5.60.5_rf.n4_test$probRight
bst_tda_pca_5.60.5_rf.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf.n4_test),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1622
##
## $winRight
## [1] 0.8378
# Bayesian Correlated Test
bct_tda_pca_5.60.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_rf.n4_test))
#BayesFactor
#bf_tda_pca_5.60.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf.n4_test)) #bf_tda_pca_5.60.5_rf.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_rf.n4_test))
##Node5
Adult_TDA_PC_5.60.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n5.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_PC_5.60.5_n5_RfFit0
## Random Forest
##
## 16508 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11005, 11006, 11005
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.9930943 0.2856353
## 100 0.9929731 0.2966272
## 150 0.9928520 0.2924513
## 200 0.9927308 0.2796054
## 250 0.9928520 0.2924513
## 300 0.9928520 0.2924513
## 350 0.9927914 0.2833461
## 400 0.9927914 0.2910408
## 450 0.9928520 0.2924513
## 500 0.9927914 0.2904632
## 550 0.9927914 0.2833461
## 600 0.9928520 0.2924513
## 650 0.9928520 0.2924513
## 700 0.9927914 0.2904632
## 750 0.9929125 0.2945048
## 800 0.9927308 0.2814734
## 850 0.9928520 0.2924513
## 900 0.9928520 0.2924513
## 950 0.9928520 0.2924513
## 1000 0.9927914 0.2815935
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.60.5_n5_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.9923678 0.2197383 Fold1
## 2 0.9934581 0.3313532 Fold3
## 3 0.9934569 0.3058143 Fold2
ad_tda_pc_5.60.5_n5_rf_fit0_re<-Adult_TDA_PC_5.60.5_n5_RfFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n5_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 16508 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 33016 matrix numeric
## oob.times 16508 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 16508 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.60.5_n5_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n5_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.60.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n5_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7412 2057
## >50K 4 295
##
## Accuracy : 0.789
## 95% CI : (0.7808, 0.7971)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1.522e-12
##
## Kappa : 0.1779
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9995
## Specificity : 0.1254
## Pos Pred Value : 0.7828
## Neg Pred Value : 0.9866
## Prevalence : 0.7592
## Detection Rate : 0.7588
## Detection Prevalence : 0.9694
## Balanced Accuracy : 0.5624
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7412 2057
## >50K 4 295
##
## Accuracy : 0.789
## 95% CI : (0.7808, 0.7971)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1.522e-12
##
## Kappa : 0.1779
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9995
## Specificity : 0.1254
## Pos Pred Value : 0.7828
## Neg Pred Value : 0.9866
## Prevalence : 0.7592
## Detection Rate : 0.7588
## Detection Prevalence : 0.9694
## Balanced Accuracy : 0.5624
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n5_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.890049e-01 1.779051e-01 7.807768e-01 7.970604e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.522100e-12 0.000000e+00
ad_tda_pc_5.60.5_n5_rf_cf0_ov_acc<-ad_tda_pc_5.60.5_n5_rf_cf0$overall[1]
ad_tda_pc_5.60.5_n5_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9994606 0.1254252 0.7827648
## Neg Pred Value Precision Recall
## 0.9866221 0.7827648 0.9994606
## F1 Prevalence Detection Rate
## 0.8779390 0.7592138 0.7588043
## Detection Prevalence Balanced Accuracy
## 0.9693898 0.5624429
ad_tda_pc_5.60.5_n5_rf_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n5_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.60.5_n5_rf_fit0_re)
diff_tda_pca_5.60.5_rf_n5_3_fold
## Accuracy
## 1 -0.1287374
## 2 -0.1358640
## 3 -0.1429700
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n5_3_fold_odds.left<-bst_tda_pca_5.60.5_rf.n5_3_fold$probLeft/bst_tda_pca_5.60.5_rf.n5_3_fold$probRight
bst_tda_pca_5.60.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n5_3_fold
## $winLeft
## [1] 0.9913
##
## $winRope
## [1] 0.0087
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n5_3_fold
## $left
## [1] 0.9992911
##
## $rope
## [1] 0.0001808052
##
## $right
## [1] 0.0005281393
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf_n5_3_fold))
#bf_tda_pca_5.60.5_rf.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_rf_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_rf_n5_3_fold)
## t = -33.067, df = 2, p-value = 0.0009133
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1535349 -0.1181793
## sample estimates:
## mean of x
## -0.1358571
### Test set diff
diff_tda_pca_5.60.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_pc_5.60.5_n5_rf_cf0_ov_acc)
diff_tda_pca_5.60.5_rf.n5_test
## Accuracy
## 0.06992219
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_rf.n5_test),-0.01,0.01)
bst_tda_pca_5.60.5_rf.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_rf.n5_test_odds.left<-bst_tda_pca_5.60.5_rf.n5_test$probLeft/bst_tda_pca_5.60.5_rf.n5_test$probRight
bst_tda_pca_5.60.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_rf.n5_test),-0.01,0.01)
bsr_tda_pca_5.60.5_rf.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1624333
##
## $winRight
## [1] 0.8375667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_rf.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_rf.n5_test))
#BayesFactor
#bf_tda_pca_5.60.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_rf.n5_test)) #bf_tda_pca_5.60.5_rf.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_rf.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.60.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n1.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_KDE_5.60.5_n1_RfFit0
## Random Forest
##
## 15260 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 10174, 10172, 10174
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8604187 0.6249923
## 100 0.8600255 0.6239360
## 150 0.8595669 0.6229970
## 200 0.8583217 0.6197994
## 250 0.8596978 0.6235054
## 300 0.8592393 0.6218488
## 350 0.8596323 0.6241861
## 400 0.8587150 0.6209807
## 450 0.8589115 0.6212022
## 500 0.8586493 0.6210787
## 550 0.8593703 0.6224559
## 600 0.8600908 0.6250479
## 650 0.8595013 0.6228378
## 700 0.8596322 0.6231049
## 750 0.8589117 0.6212448
## 800 0.8584530 0.6196371
## 850 0.8589771 0.6216434
## 900 0.8593047 0.6227621
## 950 0.8594359 0.6227783
## 1000 0.8593047 0.6219482
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.60.5_n1_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8598112 0.6186843 Fold1
## 2 0.8554856 0.6179074 Fold3
## 3 0.8659591 0.6383852 Fold2
ad_tda_kde_5.60.5_n1_rf_fit0_re<-Adult_TDA_KDE_5.60.5_n1_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n1_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 15260 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 30520 matrix numeric
## oob.times 15260 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 15260 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.60.5_n1_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n1_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.60.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n1_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7232 559
## >50K 184 1793
##
## Accuracy : 0.9239
## 95% CI : (0.9185, 0.9291)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.78
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9752
## Specificity : 0.7623
## Pos Pred Value : 0.9283
## Neg Pred Value : 0.9069
## Prevalence : 0.7592
## Detection Rate : 0.7404
## Detection Prevalence : 0.7976
## Balanced Accuracy : 0.8688
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7232 559
## >50K 184 1793
##
## Accuracy : 0.9239
## 95% CI : (0.9185, 0.9291)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.78
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9752
## Specificity : 0.7623
## Pos Pred Value : 0.9283
## Neg Pred Value : 0.9069
## Prevalence : 0.7592
## Detection Rate : 0.7404
## Detection Prevalence : 0.7976
## Balanced Accuracy : 0.8688
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n1_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.239353e-01 7.799778e-01 9.184994e-01 9.291167e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 0.000000e+00 7.629413e-43
ad_tda_kde_5.60.5_n1_rf_cf0_ov_acc<-ad_tda_kde_5.60.5_n1_rf_cf0$overall[1]
ad_tda_kde_5.60.5_n1_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9751888 0.7623299 0.9282505
## Neg Pred Value Precision Recall
## 0.9069297 0.9282505 0.9751888
## F1 Prevalence Detection Rate
## 0.9511409 0.7592138 0.7403767
## Detection Prevalence Balanced Accuracy
## 0.7976044 0.8687594
ad_tda_kde_5.60.5_n1_rf_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n1_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.60.5_n1_rf_fit0_re)
diff_tda_kde_5.60.5_rf_n1_3_fold
## Accuracy
## 1 0.003819134
## 2 0.002108457
## 3 -0.015472149
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n1_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n1_3_fold_odds.left<-bst_tda_kde_5.60.5_rf.n1_3_fold$probLeft/bst_tda_kde_5.60.5_rf.n1_3_fold$probRight
bst_tda_kde_5.60.5_rf.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n1_3_fold
## $winLeft
## [1] 0.04716667
##
## $winRope
## [1] 0.9528333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n1_3_fold
## $left
## [1] 0.2196185
##
## $rope
## [1] 0.6777404
##
## $right
## [1] 0.1026411
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_rf_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf_n1_3_fold))
#bf_tda_kde_5.60.5_rf.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_rf_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_rf_n1_3_fold)
## t = -0.51605, df = 2, p-value = 0.6572
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.02970791 0.02334487
## sample estimates:
## mean of x
## -0.00318152
### Test set diff
diff_tda_kde_5.60.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_kde_5.60.5_n1_rf_cf0_ov_acc)
diff_tda_kde_5.60.5_rf.n1_test
## Accuracy
## -0.06500819
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf.n1_test),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n1_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n1_test_odds.left<-bst_tda_kde_5.60.5_rf.n1_test$probLeft/bst_tda_kde_5.60.5_rf.n1_test$probRight
bst_tda_kde_5.60.5_rf.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf.n1_test),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n1_test
## $winLeft
## [1] 0.8402333
##
## $winRope
## [1] 0.1597667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_rf.n1_test))
#BayesFactor
#bf_tda_kde_5.60.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf.n1_test)) #bf_tda_kde_5.60.5_rf.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_rf.n1_test))
##Node2
Adult_TDA_KDE_5.60.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n2.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
Adult_TDA_KDE_5.60.5_n2_RfFit0
## Random Forest
##
## 14482 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9654, 9655, 9655
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8442202 0.6010694
## 100 0.8422869 0.5986739
## 150 0.8422180 0.5980398
## 200 0.8408369 0.5946913
## 250 0.8403535 0.5931887
## 300 0.8418727 0.5974886
## 350 0.8418726 0.5973079
## 400 0.8429085 0.6003993
## 450 0.8418727 0.5977362
## 500 0.8411132 0.5960861
## 550 0.8424251 0.5984086
## 600 0.8406987 0.5942363
## 650 0.8429084 0.5995989
## 700 0.8417345 0.5967635
## 750 0.8410440 0.5954497
## 800 0.8425633 0.5991881
## 850 0.8412512 0.5961252
## 900 0.8417346 0.5971376
## 950 0.8404226 0.5935948
## 1000 0.8408370 0.5945127
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.60.5_n2_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8475559 0.6104136 Fold1
## 2 0.8388233 0.5879605 Fold3
## 3 0.8462813 0.6048341 Fold2
ad_tda_KDE_5.60.5_n2_rf_fit0_re<-Adult_TDA_KDE_5.60.5_n2_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n2_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 14482 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 28964 matrix numeric
## oob.times 14482 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 14482 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.60.5_n2_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n2_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.60.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n2_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7197 498
## >50K 219 1854
##
## Accuracy : 0.9266
## 95% CI : (0.9212, 0.9317)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7908
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9705
## Specificity : 0.7883
## Pos Pred Value : 0.9353
## Neg Pred Value : 0.8944
## Prevalence : 0.7592
## Detection Rate : 0.7368
## Detection Prevalence : 0.7878
## Balanced Accuracy : 0.8794
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7197 498
## >50K 219 1854
##
## Accuracy : 0.9266
## 95% CI : (0.9212, 0.9317)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7908
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9705
## Specificity : 0.7883
## Pos Pred Value : 0.9353
## Neg Pred Value : 0.8944
## Prevalence : 0.7592
## Detection Rate : 0.7368
## Detection Prevalence : 0.7878
## Balanced Accuracy : 0.8794
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n2_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.265971e-01 7.907608e-01 9.212457e-01 9.316923e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 0.000000e+00 2.991211e-25
ad_tda_kde_5.60.5_n2_rf_cf0_ov_acc<-ad_tda_kde_5.60.5_n2_rf_cf0$overall[1]
ad_tda_kde_5.60.5_n2_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9704693 0.7882653 0.9352827
## Neg Pred Value Precision Recall
## 0.8943560 0.9352827 0.9704693
## F1 Prevalence Detection Rate
## 0.9525511 0.7592138 0.7367936
## Detection Prevalence Balanced Accuracy
## 0.7877764 0.8793673
ad_tda_kde_5.60.5_n2_rf_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n2_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_KDE_5.60.5_n2_rf_fit0_re)
diff_tda_kde_5.60.5_rf_n2_3_fold
## Accuracy
## 1 0.016074457
## 2 0.018770818
## 3 0.004205636
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n2_3_fold_odds.left<-bst_tda_kde_5.60.5_rf.n2_3_fold$probLeft/bst_tda_kde_5.60.5_rf.n2_3_fold$probRight
bst_tda_kde_5.60.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.3032333
##
## $winRight
## [1] 0.6967667
# Bayesian Correlated Test
bct_tda_kde_5.60.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n2_3_fold
## $left
## [1] 0.02343112
##
## $rope
## [1] 0.2857245
##
## $right
## [1] 0.6908443
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_rf_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf_n2_3_fold))
#bf_tda_kde_5.60.5_rf.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_rf_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_rf_n2_3_fold)
## t = 2.9095, df = 2, p-value = 0.1006
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.006232661 0.032266601
## sample estimates:
## mean of x
## 0.01301697
### Test set diff
diff_tda_kde_5.60.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_kde_5.60.5_n2_rf_cf0_ov_acc)
diff_tda_kde_5.60.5_rf.n2_test
## Accuracy
## -0.06766994
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf.n2_test),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n2_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n2_test_odds.left<-bst_tda_kde_5.60.5_rf.n2_test$probLeft/bst_tda_kde_5.60.5_rf.n2_test$probRight
bst_tda_kde_5.60.5_rf.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf.n2_test),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n2_test
## $winLeft
## [1] 0.8399
##
## $winRope
## [1] 0.1601
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_rf.n2_test))
#BayesFactor
#bf_tda_kde_5.60.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf.n2_test)) #bf_tda_kde_5.60.5_rf.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_rf.n2_test))
##Node3
Adult_TDA_KDE_5.60.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n3.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
Adult_TDA_KDE_5.60.5_n3_RfFit0
## Random Forest
##
## 13266 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8844, 8844, 8844
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8365747 0.5678172
## 100 0.8328057 0.5585570
## 150 0.8352932 0.5650890
## 200 0.8343133 0.5624650
## 250 0.8316750 0.5552553
## 300 0.8328810 0.5588771
## 350 0.8331826 0.5590260
## 400 0.8343133 0.5625154
## 450 0.8317503 0.5555991
## 500 0.8322026 0.5566543
## 550 0.8344640 0.5627715
## 600 0.8323534 0.5574370
## 650 0.8319765 0.5556011
## 700 0.8337102 0.5614182
## 750 0.8326549 0.5583142
## 800 0.8340871 0.5619008
## 850 0.8339364 0.5615830
## 900 0.8331072 0.5591186
## 950 0.8340118 0.5623831
## 1000 0.8328057 0.5586455
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.60.5_n3_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8353686 0.5651862 Fold1
## 2 0.8371777 0.5704360 Fold3
## 3 0.8371777 0.5678294 Fold2
ad_tda_kde_5.60.5_n3_rf_fit0_re<-Adult_TDA_KDE_5.60.5_n3_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n3_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 13266 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 26532 matrix numeric
## oob.times 13266 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 13266 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.60.5_n3_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n3_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.60.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n3_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7141 539
## >50K 275 1813
##
## Accuracy : 0.9167
## 95% CI : (0.911, 0.9221)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.763
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9629
## Specificity : 0.7708
## Pos Pred Value : 0.9298
## Neg Pred Value : 0.8683
## Prevalence : 0.7592
## Detection Rate : 0.7311
## Detection Prevalence : 0.7862
## Balanced Accuracy : 0.8669
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7141 539
## >50K 275 1813
##
## Accuracy : 0.9167
## 95% CI : (0.911, 0.9221)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.763
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9629
## Specificity : 0.7708
## Pos Pred Value : 0.9298
## Neg Pred Value : 0.8683
## Prevalence : 0.7592
## Detection Rate : 0.7311
## Detection Prevalence : 0.7862
## Balanced Accuracy : 0.8669
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n3_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.166667e-01 7.629915e-01 9.110094e-01 9.220740e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 0.000000e+00 3.022836e-20
ad_tda_kde_5.60.5_n3_rf_cf0_ov_acc<-ad_tda_kde_5.60.5_n3_rf_cf0$overall[1]
ad_tda_kde_5.60.5_n3_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9629180 0.7708333 0.9298177
## Neg Pred Value Precision Recall
## 0.8682950 0.9298177 0.9629180
## F1 Prevalence Detection Rate
## 0.9460784 0.7592138 0.7310606
## Detection Prevalence Balanced Accuracy
## 0.7862408 0.8668757
ad_tda_kde_5.60.5_n3_rf_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n3_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.60.5_n3_rf_fit0_re)
diff_tda_kde_5.60.5_rf_n3_3_fold
## Accuracy
## 1 0.02826177
## 2 0.02041636
## 3 0.01330922
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n3_3_fold_odds.left<-bst_tda_kde_5.60.5_rf.n3_3_fold$probLeft/bst_tda_kde_5.60.5_rf.n3_3_fold$probRight
bst_tda_kde_5.60.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.03723333
##
## $winRight
## [1] 0.9627667
# Bayesian Correlated Test
bct_tda_kde_5.60.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n3_3_fold
## $left
## [1] 0.01271962
##
## $rope
## [1] 0.07023381
##
## $right
## [1] 0.9170466
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_rf_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf_n3_3_fold))
#bf_tda_kde_5.60.5_rf.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_rf_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_rf_n3_3_fold)
## t = 4.785, df = 2, p-value = 0.04101
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.002082813 0.039242086
## sample estimates:
## mean of x
## 0.02066245
### Test set diff
diff_tda_kde_5.60.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_kde_5.60.5_n3_rf_cf0_ov_acc)
diff_tda_kde_5.60.5_rf.n3_test
## Accuracy
## -0.05773956
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf.n3_test),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n3_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n3_test_odds.left<-bst_tda_kde_5.60.5_rf.n3_test$probLeft/bst_tda_kde_5.60.5_rf.n3_test$probRight
bst_tda_kde_5.60.5_rf.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf.n3_test),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n3_test
## $winLeft
## [1] 0.8409667
##
## $winRope
## [1] 0.1590333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_rf.n3_test))
#BayesFactor
#bf_tda_kde_5.60.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf.n3_test)) #bf_tda_kde_5.60.5_rf.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_rf.n3_test))
##Node4
Adult_TDA_KDE_5.60.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n4.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
Adult_TDA_KDE_5.60.5_n4_RfFit0
## Random Forest
##
## 11795 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7863, 7864, 7863
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8538363 0.5382207
## 100 0.8498513 0.5275065
## 150 0.8506991 0.5298670
## 200 0.8503600 0.5301043
## 250 0.8502752 0.5293552
## 300 0.8506992 0.5321344
## 350 0.8512078 0.5330523
## 400 0.8508689 0.5316184
## 450 0.8512079 0.5328793
## 500 0.8516318 0.5342367
## 550 0.8506991 0.5307403
## 600 0.8511231 0.5331564
## 650 0.8510383 0.5319712
## 700 0.8507840 0.5314409
## 750 0.8495970 0.5290543
## 800 0.8486644 0.5246464
## 850 0.8504449 0.5312280
## 900 0.8501906 0.5297986
## 950 0.8495969 0.5278199
## 1000 0.8508686 0.5317453
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.60.5_n4_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8611394 0.5534385 Fold1
## 2 0.8476602 0.5170971 Fold3
## 3 0.8527092 0.5441265 Fold2
ad_tda_kde_5.60.5_n4_rf_fit0_re<-Adult_TDA_KDE_5.60.5_n4_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n4_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 11795 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 23590 matrix numeric
## oob.times 11795 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 11795 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.60.5_n4_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n4_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.60.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n4_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6799 597
## >50K 617 1755
##
## Accuracy : 0.8757
## 95% CI : (0.869, 0.8822)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6611
##
## Mcnemar's Test P-Value : 0.5855
##
## Sensitivity : 0.9168
## Specificity : 0.7462
## Pos Pred Value : 0.9193
## Neg Pred Value : 0.7399
## Prevalence : 0.7592
## Detection Rate : 0.6960
## Detection Prevalence : 0.7572
## Balanced Accuracy : 0.8315
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6799 597
## >50K 617 1755
##
## Accuracy : 0.8757
## 95% CI : (0.869, 0.8822)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6611
##
## Mcnemar's Test P-Value : 0.5855
##
## Sensitivity : 0.9168
## Specificity : 0.7462
## Pos Pred Value : 0.9193
## Neg Pred Value : 0.7399
## Prevalence : 0.7592
## Detection Rate : 0.6960
## Detection Prevalence : 0.7572
## Balanced Accuracy : 0.8315
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n4_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.757166e-01 6.610558e-01 8.690109e-01 8.821974e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.835282e-184 5.855396e-01
ad_tda_kde_5.60.5_n4_rf_cf0_ov_acc<-ad_tda_kde_5.60.5_n4_rf_cf0$overall[1]
ad_tda_kde_5.60.5_n4_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9168015 0.7461735 0.9192807
## Neg Pred Value Precision Recall
## 0.7398820 0.9192807 0.9168015
## F1 Prevalence Detection Rate
## 0.9180394 0.7592138 0.6960483
## Detection Prevalence Balanced Accuracy
## 0.7571663 0.8314875
ad_tda_kde_5.60.5_n4_rf_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n4_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.60.5_n4_rf_fit0_re)
diff_tda_kde_5.60.5_rf_n4_3_fold
## Accuracy
## 1 0.002491011
## 2 0.009933880
## 3 -0.002222264
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n4_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n4_3_fold_odds.left<-bst_tda_kde_5.60.5_rf.n4_3_fold$probLeft/bst_tda_kde_5.60.5_rf.n4_3_fold$probRight
bst_tda_kde_5.60.5_rf.n4_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n4_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n4_3_fold
## $left
## [1] 0.04086506
##
## $rope
## [1] 0.8353051
##
## $right
## [1] 0.1238298
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_rf_n4_3_fold,c(-0.01,0.01)))

### Test set diff
diff_tda_kde_5.60.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_kde_5.60.5_n4_rf_cf0_ov_acc)
diff_tda_kde_5.60.5_rf.n4_test
## Accuracy
## -0.01678952
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf.n4_test),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n4_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
#BayesFactor
#bf_tda_kde_5.60.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf_n4_3_fold))
#bf_tda_kde_5.60.5_rf.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_rf_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_rf_n4_3_fold)
## t = 0.9611, df = 2, p-value = 0.4379
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.01182424 0.01862600
## sample estimates:
## mean of x
## 0.003400876
bst_tda_kde_5.60.5_rf.n4_test_odds.left<-bst_tda_kde_5.60.5_rf.n4_test$probLeft/bst_tda_kde_5.60.5_rf.n4_test$probRight
bst_tda_kde_5.60.5_rf.n4_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf.n4_test),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n4_test
## $winLeft
## [1] 0.5425
##
## $winRope
## [1] 0.4575
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_rf.n4_test))
#BayesFactor
#bf_tda_kde_5.60.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf.n4_test)) #bf_tda_kde_5.60.5_rf.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_rf.n4_test))
##Node5
Adult_TDA_KDE_5.60.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n5.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors, V4.Doctorate,
## V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
Adult_TDA_KDE_5.60.5_n5_RfFit0
## Random Forest
##
## 8940 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5960, 5960, 5960
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8734899 0.4335920
## 100 0.8694631 0.4247921
## 150 0.8682327 0.4222203
## 200 0.8690157 0.4261507
## 250 0.8674497 0.4170683
## 300 0.8684564 0.4220421
## 350 0.8700224 0.4300513
## 400 0.8681208 0.4196209
## 450 0.8680089 0.4192217
## 500 0.8680089 0.4199632
## 550 0.8681208 0.4226921
## 600 0.8685682 0.4242608
## 650 0.8680089 0.4187985
## 700 0.8685682 0.4239588
## 750 0.8673378 0.4198456
## 800 0.8674497 0.4181890
## 850 0.8680089 0.4232862
## 900 0.8682327 0.4218440
## 950 0.8671141 0.4182009
## 1000 0.8683445 0.4216936
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.60.5_n5_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8684564 0.4186523 Fold1
## 2 0.8704698 0.4228168 Fold3
## 3 0.8815436 0.4593069 Fold2
ad_tda_kde_5.60.5_n5_rf_fit0_re<-Adult_TDA_KDE_5.60.5_n5_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n5_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 8940 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 17880 matrix numeric
## oob.times 8940 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 8940 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.60.5_n5_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n5_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.60.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n5_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6998 860
## >50K 418 1492
##
## Accuracy : 0.8692
## 95% CI : (0.8623, 0.8758)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6176
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9436
## Specificity : 0.6344
## Pos Pred Value : 0.8906
## Neg Pred Value : 0.7812
## Prevalence : 0.7592
## Detection Rate : 0.7164
## Detection Prevalence : 0.8045
## Balanced Accuracy : 0.7890
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6998 860
## >50K 418 1492
##
## Accuracy : 0.8692
## 95% CI : (0.8623, 0.8758)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6176
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9436
## Specificity : 0.6344
## Pos Pred Value : 0.8906
## Neg Pred Value : 0.7812
## Prevalence : 0.7592
## Detection Rate : 0.7164
## Detection Prevalence : 0.8045
## Balanced Accuracy : 0.7890
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n5_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.691646e-01 6.176167e-01 8.623158e-01 8.757924e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.668548e-162 5.798927e-35
ad_tda_kde_5.60.5_n5_rf_cf0_ov_acc<-ad_tda_kde_5.60.5_n5_rf_cf0$overall[1]
ad_tda_kde_5.60.5_n5_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9436354 0.6343537 0.8905574
## Neg Pred Value Precision Recall
## 0.7811518 0.8905574 0.9436354
## F1 Prevalence Detection Rate
## 0.9163284 0.7592138 0.7164210
## Detection Prevalence Balanced Accuracy
## 0.8044636 0.7889946
ad_tda_kde_5.60.5_n5_rf_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n5_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.60.5_n5_rf_fit0_re)
diff_tda_kde_5.60.5_rf_n5_3_fold
## Accuracy
## 1 -0.004825995
## 2 -0.012875695
## 3 -0.031056654
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n5_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n5_3_fold_odds.left<-bst_tda_kde_5.60.5_rf.n5_3_fold$probLeft/bst_tda_kde_5.60.5_rf.n5_3_fold$probRight
bst_tda_kde_5.60.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n5_3_fold
## $winLeft
## [1] 0.6046667
##
## $winRope
## [1] 0.3953333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n5_3_fold
## $left
## [1] 0.7212907
##
## $rope
## [1] 0.2290173
##
## $right
## [1] 0.04969198
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf_n5_3_fold))
#bf_tda_kde_5.60.5_rf.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_rf_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_rf_n5_3_fold)
## t = -2.0949, df = 2, p-value = 0.1712
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.04963329 0.01712773
## sample estimates:
## mean of x
## -0.01625278
### Test set diff
diff_tda_kde_5.60.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_kde_5.60.5_n5_rf_cf0_ov_acc)
diff_tda_kde_5.60.5_rf.n5_test
## Accuracy
## -0.01023751
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_rf.n5_test),-0.01,0.01)
bst_tda_kde_5.60.5_rf.n5_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_rf.n5_test_odds.left<-bst_tda_kde_5.60.5_rf.n5_test$probLeft/bst_tda_kde_5.60.5_rf.n5_test$probRight
bst_tda_kde_5.60.5_rf.n5_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_rf.n5_test),-0.01,0.01)
bsr_tda_kde_5.60.5_rf.n5_test
## $winLeft
## [1] 0.5425333
##
## $winRope
## [1] 0.4574667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_rf.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_rf.n5_test))
#BayesFactor
#bf_tda_kde_5.60.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_rf.n5_test)) #bf_tda_kde_5.60.5_rf.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_rf.n5_test))
##Non-TDA-Assisted
svmGrid<-expand.grid(sigma = c(0.1, 1, 10), C = (1:5*0.25))
#Support Vector Machine-Radial Basis
adultSvmFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
adultSvmFit
## Support Vector Machines with Radial Basis Function Kernel
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15196, 15195, 15195
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.8071777 0.3474412847
## 0.1 0.50 0.8161718 0.4016471422
## 0.1 0.75 0.8205591 0.4288367993
## 0.1 1.00 0.8231914 0.4462417109
## 0.1 1.25 0.8233669 0.4527020877
## 1.0 0.25 0.7780021 0.1405706744
## 1.0 0.50 0.7882245 0.2200226573
## 1.0 0.75 0.7926118 0.2614948044
## 1.0 1.00 0.7961655 0.2925500442
## 1.0 1.25 0.7971746 0.3092072051
## 10.0 0.25 0.7590050 -0.0003505724
## 10.0 0.50 0.7604528 0.0157859748
## 10.0 0.75 0.7632606 0.0455007542
## 10.0 1.00 0.7655859 0.0761557182
## 10.0 1.25 0.7660685 0.0942110410
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
adultSvmFit$resample
## Accuracy Kappa Resample
## 1 0.8242727 0.4594833 Fold1
## 2 0.8223217 0.4519671 Fold3
## 3 0.8235062 0.4466559 Fold2
ad_svm_fit_re<-adultSvmFit$resample[1]
summary(adultSvmFit)
## Length Class Mode
## 1 ksvm S4
#vip(adultSvmFit, 25) + ggtitle("non-TDA-Assited Svm")
# Predict outcome using model from training data based on testing data
predictions <- predict(adultSvmFit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
svm_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
svm_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6941 1215
## >50K 475 1137
##
## Accuracy : 0.827
## 95% CI : (0.8193, 0.8344)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4698
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9359
## Specificity : 0.4834
## Pos Pred Value : 0.8510
## Neg Pred Value : 0.7053
## Prevalence : 0.7592
## Detection Rate : 0.7106
## Detection Prevalence : 0.8350
## Balanced Accuracy : 0.7097
##
## 'Positive' Class : <=50K
##
svm_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.269861e-01 4.698380e-01 8.193364e-01 8.344402e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.336799e-59 2.986040e-72
svm_cf_ov_acc<-svm_cf$overall[1]
svm_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9359493 0.4834184 0.8510299
## Neg Pred Value Precision Recall
## 0.7053350 0.8510299 0.9359493
## F1 Prevalence Detection Rate
## 0.8914719 0.7592138 0.7105856
## Detection Prevalence Balanced Accuracy
## 0.8349713 0.7096838
svm_cf_pr_rec_f1<-svm_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.60.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Trinadad.Tobago
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.60.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 13933 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9288, 9289, 9289
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.6877912 0.37316378
## 0.1 0.50 0.7017151 0.40186511
## 0.1 0.75 0.7032941 0.40552941
## 0.1 1.00 0.7054471 0.41015184
## 0.1 1.25 0.7061649 0.41173707
## 1.0 0.25 0.6186746 0.22944832
## 1.0 0.50 0.6460194 0.28662967
## 1.0 0.75 0.6576468 0.31122720
## 1.0 1.00 0.6642500 0.32544440
## 1.0 1.25 0.6653267 0.32791605
## 10.0 0.25 0.5252995 0.03411200
## 10.0 0.50 0.5498455 0.08575684
## 10.0 0.75 0.5718796 0.13249578
## 10.0 1.00 0.5919035 0.17516474
## 10.0 1.25 0.5985064 0.18909381
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_PC_5.60.5_n1_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7113025 0.4221246 Fold1
## 2 0.7047804 0.4090989 Fold3
## 3 0.7024117 0.4039877 Fold2
ad_tda_pc_5.60.5_n1_svm_fit_re<-Adult_TDA_PC_5.60.5_n1_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.60.5_n1_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.60.5_n1_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n1_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.60.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1770 436
## >50K 5646 1916
##
## Accuracy : 0.3774
## 95% CI : (0.3677, 0.3871)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0303
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2387
## Specificity : 0.8146
## Pos Pred Value : 0.8024
## Neg Pred Value : 0.2534
## Prevalence : 0.7592
## Detection Rate : 0.1812
## Detection Prevalence : 0.2258
## Balanced Accuracy : 0.5266
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1770 436
## >50K 5646 1916
##
## Accuracy : 0.3774
## 95% CI : (0.3677, 0.3871)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0303
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2387
## Specificity : 0.8146
## Pos Pred Value : 0.8024
## Neg Pred Value : 0.2534
## Prevalence : 0.7592
## Detection Rate : 0.1812
## Detection Prevalence : 0.2258
## Balanced Accuracy : 0.5266
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n1_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.37735463 0.03034731 0.36772860 0.38705383 0.75921376
## AccuracyPValue McnemarPValue
## 1.00000000 0.00000000
ad_tda_pc_5.60.5_n1_svm_cf0_ov_acc<-ad_tda_pc_5.60.5_n1_svm_cf0$overall[1]
ad_tda_pc_5.60.5_n1_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.2386731 0.8146259 0.8023572
## Neg Pred Value Precision Recall
## 0.2533721 0.8023572 0.2386731
## F1 Prevalence Detection Rate
## 0.3679069 0.7592138 0.1812039
## Detection Prevalence Balanced Accuracy
## 0.2258395 0.5266495
ad_tda_pc_5.60.5_n1_svm_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n1_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_svm_n1_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.60.5_n1_svm_fit_re)
diff_tda_pca_5.60.5_svm_n1_3_fold
## Accuracy
## 1 0.1129703
## 2 0.1175413
## 3 0.1210945
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n1_3_fold_odds.left<-bst_tda_pca_5.60.5_svm.n1_3_fold$probLeft/bst_tda_pca_5.60.5_svm.n1_3_fold$probRight
bst_tda_pca_5.60.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0086
##
## $winRight
## [1] 0.9914
# Bayesian Correlated Test
bct_tda_pca_5.60.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n1_3_fold
## $left
## [1] 0.000227652
##
## $rope
## [1] 9.277757e-05
##
## $right
## [1] 0.9996796
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm_n1_3_fold))
#bf_tda_pca_5.60.5_rf.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_svm_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_svm_n1_3_fold)
## t = 49.844, df = 2, p-value = 0.0004023
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1070848 0.1273192
## sample estimates:
## mean of x
## 0.117202
### Test set diff
diff_tda_pca_5.60.5_svm.n1_test<-(svm_cf_ov_acc - ad_tda_pc_5.60.5_n1_svm_cf0_ov_acc)
diff_tda_pca_5.60.5_svm.n1_test
## Accuracy
## 0.4496314
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm.n1_test),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n1_test_odds.left<-bst_tda_pca_5.60.5_svm.n1_test$probLeft/bst_tda_pca_5.60.5_svm.n1_test$probRight
bst_tda_pca_5.60.5_svm.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm.n1_test),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1624
##
## $winRight
## [1] 0.8376
# Bayesian Correlated Test
bct_tda_pca_5.60.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_svm.n1_test)))
#BayesFactor
#bf_tda_pca_5.60.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm.n1_test)) #bf_tda_pca_5.60.5_svm.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_svm.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_PC_5.60.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.60.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 13933 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9290, 9288, 9288
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.6891556 0.37591079
## 0.1 0.50 0.7011412 0.40072167
## 0.1 0.75 0.7058782 0.41058935
## 0.1 1.00 0.7088213 0.41682332
## 0.1 1.25 0.7093239 0.41798372
## 1.0 0.25 0.6184620 0.22902828
## 1.0 0.50 0.6447297 0.28393248
## 1.0 0.75 0.6573611 0.31064501
## 1.0 1.00 0.6647531 0.32642489
## 1.0 1.25 0.6632457 0.32368642
## 10.0 0.25 0.5244385 0.03227667
## 10.0 0.50 0.5483392 0.08266416
## 10.0 0.75 0.5705883 0.12985391
## 10.0 1.00 0.5898947 0.17099505
## 10.0 1.25 0.5962830 0.18448001
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_PC_5.60.5_n2_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7141934 0.4282358 Fold1
## 2 0.7063509 0.4118004 Fold3
## 3 0.7074273 0.4139150 Fold2
ad_tda_pc_5.60.5_n2_svm_fit_re<-Adult_TDA_PC_5.60.5_n2_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.60.5_n2_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.60.5_n2_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n2_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.60.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1770 436
## >50K 5646 1916
##
## Accuracy : 0.3774
## 95% CI : (0.3677, 0.3871)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0303
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2387
## Specificity : 0.8146
## Pos Pred Value : 0.8024
## Neg Pred Value : 0.2534
## Prevalence : 0.7592
## Detection Rate : 0.1812
## Detection Prevalence : 0.2258
## Balanced Accuracy : 0.5266
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1770 436
## >50K 5646 1916
##
## Accuracy : 0.3774
## 95% CI : (0.3677, 0.3871)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0303
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.2387
## Specificity : 0.8146
## Pos Pred Value : 0.8024
## Neg Pred Value : 0.2534
## Prevalence : 0.7592
## Detection Rate : 0.1812
## Detection Prevalence : 0.2258
## Balanced Accuracy : 0.5266
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n2_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.37735463 0.03034731 0.36772860 0.38705383 0.75921376
## AccuracyPValue McnemarPValue
## 1.00000000 0.00000000
ad_tda_pc_5.60.5_n2_svm_cf0_ov_acc<-ad_tda_pc_5.60.5_n2_svm_cf0$overall[1]
ad_tda_pc_5.60.5_n2_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.2386731 0.8146259 0.8023572
## Neg Pred Value Precision Recall
## 0.2533721 0.8023572 0.2386731
## F1 Prevalence Detection Rate
## 0.3679069 0.7592138 0.1812039
## Detection Prevalence Balanced Accuracy
## 0.2258395 0.5266495
ad_tda_pc_5.60.5_n2_svm_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n2_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_svm_n2_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.60.5_n2_svm_fit_re)
diff_tda_pca_5.60.5_svm_n2_3_fold
## Accuracy
## 1 0.1100793
## 2 0.1159707
## 3 0.1160788
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n2_3_fold_odds.left<-bst_tda_pca_5.60.5_svm.n2_3_fold$probLeft/bst_tda_pca_5.60.5_svm.n2_3_fold$probRight
bst_tda_pca_5.60.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.008266667
##
## $winRight
## [1] 0.9917333
# Bayesian Correlated Test
bct_tda_pca_5.60.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n2_3_fold
## $left
## [1] 0.0001701296
##
## $rope
## [1] 7.1642e-05
##
## $right
## [1] 0.9997582
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm_n2_3_fold))
#bf_tda_pca_5.60.5_rf.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_svm_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_svm_n2_3_fold)
## t = 57.537, df = 2, p-value = 0.0003019
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1055148 0.1225711
## sample estimates:
## mean of x
## 0.114043
### Test set diff
diff_tda_pca_5.60.5_svm.n2_test<-(svm_cf_ov_acc - ad_tda_pc_5.60.5_n2_svm_cf0_ov_acc)
diff_tda_pca_5.60.5_svm.n2_test
## Accuracy
## 0.4496314
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm.n2_test),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n2_test_odds.left<-bst_tda_pca_5.60.5_svm.n2_test$probLeft/bst_tda_pca_5.60.5_svm.n2_test$probRight
bst_tda_pca_5.60.5_svm.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm.n2_test),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1609333
##
## $winRight
## [1] 0.8390667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_svm.n2_test)))
#BayesFactor
#bf_tda_pca_5.60.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm.n2_test)) #bf_tda_pca_5.60.5_svm.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_svm.n2_test))
##Node3
Adult_TDA_PC_5.60.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n3.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.60.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 15744 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 10496, 10497, 10495
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.7545733 0.1000473290
## 0.1 0.50 0.7630845 0.1739094144
## 0.1 0.75 0.7665781 0.2100243327
## 0.1 1.00 0.7670228 0.2286455925
## 0.1 1.25 0.7665782 0.2380677939
## 1.0 0.25 0.7458714 0.0187116472
## 1.0 0.50 0.7506985 0.0777461201
## 1.0 0.75 0.7520324 0.1087844313
## 1.0 1.00 0.7536838 0.1398155746
## 1.0 1.25 0.7516515 0.1536489238
## 10.0 0.25 0.7440930 -0.0001269622
## 10.0 0.50 0.7440930 0.0073361909
## 10.0 0.75 0.7440295 0.0241120566
## 10.0 1.00 0.7419969 0.0366308310
## 10.0 1.25 0.7399642 0.0451915066
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_PC_5.60.5_n3_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7682927 0.2349959 Fold1
## 2 0.7699638 0.2407153 Fold2
## 3 0.7628120 0.2102256 Fold3
ad_tda_pc_5.60.5_n3_svm_fit_re<-Adult_TDA_PC_5.60.5_n3_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.60.5_n3_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.60.5_n3_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n3_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.60.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6786 1657
## >50K 630 695
##
## Accuracy : 0.7659
## 95% CI : (0.7573, 0.7742)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.06312
##
## Kappa : 0.2474
##
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.9150
## Specificity : 0.2955
## Pos Pred Value : 0.8037
## Neg Pred Value : 0.5245
## Prevalence : 0.7592
## Detection Rate : 0.6947
## Detection Prevalence : 0.8644
## Balanced Accuracy : 0.6053
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6786 1657
## >50K 630 695
##
## Accuracy : 0.7659
## 95% CI : (0.7573, 0.7742)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.06312
##
## Kappa : 0.2474
##
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.9150
## Specificity : 0.2955
## Pos Pred Value : 0.8037
## Neg Pred Value : 0.5245
## Prevalence : 0.7592
## Detection Rate : 0.6947
## Detection Prevalence : 0.8644
## Balanced Accuracy : 0.6053
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n3_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.658681e-01 2.474291e-01 7.573408e-01 7.742366e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 6.311549e-02 4.163612e-102
ad_tda_pc_5.60.5_n3_svm_cf0_ov_acc<-ad_tda_pc_5.60.5_n3_svm_cf0$overall[1]
ad_tda_pc_5.60.5_n3_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9150485 0.2954932 0.8037427
## Neg Pred Value Precision Recall
## 0.5245283 0.8037427 0.9150485
## F1 Prevalence Detection Rate
## 0.8557917 0.7592138 0.6947174
## Detection Prevalence Balanced Accuracy
## 0.8643530 0.6052709
ad_tda_pc_5.60.5_n3_svm_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n3_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_svm_n3_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.60.5_n3_svm_fit_re)
diff_tda_pca_5.60.5_svm_n3_3_fold
## Accuracy
## 1 0.05598006
## 2 0.05235787
## 3 0.06069422
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n3_3_fold_odds.left<-bst_tda_pca_5.60.5_svm.n3_3_fold$probLeft/bst_tda_pca_5.60.5_svm.n3_3_fold$probRight
bst_tda_pca_5.60.5_svm.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009033333
##
## $winRight
## [1] 0.9909667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n3_3_fold
## $left
## [1] 0.0008798415
##
## $rope
## [1] 0.0009182851
##
## $right
## [1] 0.9982019
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm_n3_3_fold))
#bf_tda_pca_5.60.5_rf.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_svm_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_svm_n3_3_fold)
## t = 23.347, df = 2, p-value = 0.00183
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.04596017 0.06672794
## sample estimates:
## mean of x
## 0.05634405
### Test set diff
diff_tda_pca_5.60.5_svm.n3_test<-(svm_cf_ov_acc - ad_tda_pc_5.60.5_n3_svm_cf0_ov_acc)
diff_tda_pca_5.60.5_svm.n3_test
## Accuracy
## 0.06111794
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm.n3_test),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n3_test_odds.left<-bst_tda_pca_5.60.5_svm.n3_test$probLeft/bst_tda_pca_5.60.5_svm.n3_test$probRight
bst_tda_pca_5.60.5_svm.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm.n3_test),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1582
##
## $winRight
## [1] 0.8418
# Bayesian Correlated Test
bct_tda_pca_5.60.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_svm.n3_test)))
#BayesFactor
#bf_tda_pca_5.60.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm.n3_test)) #bf_tda_pca_5.60.5_svm.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_svm.n3_test))
##Node4
Adult_TDA_PC_5.60.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n4.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.60.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 19829 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 13220, 13219, 13219
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.9351959 0.018364285
## 0.1 0.50 0.9364063 0.096513530
## 0.1 0.75 0.9360029 0.137415519
## 0.1 1.00 0.9354481 0.155378198
## 0.1 1.25 0.9354482 0.180673587
## 1.0 0.25 0.9351455 0.009329094
## 1.0 0.50 0.9353977 0.026900449
## 1.0 0.75 0.9360028 0.058350931
## 1.0 1.00 0.9361037 0.077788990
## 1.0 1.25 0.9355490 0.094435490
## 10.0 0.25 0.9351455 0.000000000
## 10.0 0.50 0.9351455 0.001346823
## 10.0 0.75 0.9354481 0.012684050
## 10.0 1.00 0.9352464 0.014884351
## 10.0 1.25 0.9348429 0.020516765
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 0.5.
Adult_TDA_PC_5.60.5_n4_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.9367625 0.09707067 Fold3
## 2 0.9366016 0.07198835 Fold1
## 3 0.9358548 0.12048157 Fold2
ad_tda_pc_5.60.5_n4_svm_fit_re<-Adult_TDA_PC_5.60.5_n4_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.60.5_n4_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.60.5_n4_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n4_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.60.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7407 2296
## >50K 9 56
##
## Accuracy : 0.764
## 95% CI : (0.7555, 0.7724)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.1355
##
## Kappa : 0.0338
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.99879
## Specificity : 0.02381
## Pos Pred Value : 0.76337
## Neg Pred Value : 0.86154
## Prevalence : 0.75921
## Detection Rate : 0.75829
## Detection Prevalence : 0.99335
## Balanced Accuracy : 0.51130
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7407 2296
## >50K 9 56
##
## Accuracy : 0.764
## 95% CI : (0.7555, 0.7724)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.1355
##
## Kappa : 0.0338
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.99879
## Specificity : 0.02381
## Pos Pred Value : 0.76337
## Neg Pred Value : 0.86154
## Prevalence : 0.75921
## Detection Rate : 0.75829
## Detection Prevalence : 0.99335
## Balanced Accuracy : 0.51130
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n4_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.76402539 0.03382565 0.75547581 0.77241727 0.75921376
## AccuracyPValue McnemarPValue
## 0.13548167 0.00000000
ad_tda_pc_5.60.5_n4_svm_cf0_ov_acc<-ad_tda_pc_5.60.5_n4_svm_cf0$overall[1]
ad_tda_pc_5.60.5_n4_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.99878641 0.02380952 0.76337215
## Neg Pred Value Precision Recall
## 0.86153846 0.76337215 0.99878641
## F1 Prevalence Detection Rate
## 0.86535428 0.75921376 0.75829238
## Detection Prevalence Balanced Accuracy
## 0.99334562 0.51129797
ad_tda_pc_5.60.5_n4_svm_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n4_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_svm_n4_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.60.5_n4_svm_fit_re)
diff_tda_pca_5.60.5_svm_n4_3_fold
## Accuracy
## 1 -0.1124897
## 2 -0.1142799
## 3 -0.1123486
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n4_3_fold_odds.left<-bst_tda_pca_5.60.5_svm.n4_3_fold$probLeft/bst_tda_pca_5.60.5_svm.n4_3_fold$probRight
bst_tda_pca_5.60.5_svm.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n4_3_fold
## $winLeft
## [1] 0.9913667
##
## $winRope
## [1] 0.008633333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n4_3_fold
## $left
## [1] 0.9999757
##
## $rope
## [1] 7.245483e-06
##
## $right
## [1] 1.701437e-05
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm_n4_3_fold))
#bf_tda_pca_5.60.5_rf.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_svm_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_svm_n4_3_fold)
## t = -181.85, df = 2, p-value = 3.024e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1157139 -0.1103649
## sample estimates:
## mean of x
## -0.1130394
### Test set diff
diff_tda_pca_5.60.5_svm.n4_test<-(svm_cf_ov_acc - ad_tda_pc_5.60.5_n4_svm_cf0_ov_acc)
diff_tda_pca_5.60.5_svm.n4_test
## Accuracy
## 0.06296069
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm.n4_test),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n4_test_odds.left<-bst_tda_pca_5.60.5_svm.n4_test$probLeft/bst_tda_pca_5.60.5_svm.n4_test$probRight
bst_tda_pca_5.60.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm.n4_test),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1584333
##
## $winRight
## [1] 0.8415667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_svm.n4_test)))
#BayesFactor
#bf_tda_pca_5.60.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm.n4_test)) #bf_tda_pca_5.60.5_svm.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_svm.n4_test))
##Node5
Adult_TDA_PC_5.60.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n5.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.60.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 16508 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11005, 11005, 11006
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.9921250 0.0000000000
## 0.1 0.50 0.9921250 0.0000000000
## 0.1 0.75 0.9921250 0.0000000000
## 0.1 1.00 0.9920645 -0.0001184348
## 0.1 1.25 0.9919433 -0.0003501805
## 1.0 0.25 0.9921250 0.0000000000
## 1.0 0.50 0.9921250 0.0000000000
## 1.0 0.75 0.9921250 0.0000000000
## 1.0 1.00 0.9920645 -0.0001184960
## 1.0 1.25 0.9920039 0.0137398679
## 10.0 0.25 0.9921250 0.0000000000
## 10.0 0.50 0.9921250 0.0000000000
## 10.0 0.75 0.9921250 0.0000000000
## 10.0 1.00 0.9921250 0.0000000000
## 10.0 1.25 0.9920645 -0.0001184348
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 10 and C = 0.25.
Adult_TDA_PC_5.60.5_n5_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.9920044 0 Fold1
## 2 0.9921861 0 Fold2
## 3 0.9921847 0 Fold3
ad_tda_pc_5.60.5_n5_svm_fit_re<-Adult_TDA_PC_5.60.5_n5_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.60.5_n5_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.60.5_n5_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n5_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.60.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n5_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.60.5_n5_svm_cf0_ov_acc<-ad_tda_pc_5.60.5_n5_svm_cf0$overall[1]
ad_tda_pc_5.60.5_n5_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.60.5_n5_svm_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n5_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_svm_n5_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.60.5_n5_svm_fit_re)
diff_tda_pca_5.60.5_svm_n5_3_fold
## Accuracy
## 1 -0.1677316
## 2 -0.1698644
## 3 -0.1686785
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n5_3_fold_odds.left<-bst_tda_pca_5.60.5_svm.n5_3_fold$probLeft/bst_tda_pca_5.60.5_svm.n5_3_fold$probRight
bst_tda_pca_5.60.5_svm.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n5_3_fold
## $winLeft
## [1] 0.9901333
##
## $winRope
## [1] 0.009866667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n5_3_fold
## $left
## [1] 0.9999899
##
## $rope
## [1] 2.126858e-06
##
## $right
## [1] 7.941433e-06
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm_n5_3_fold))
#bf_tda_pca_5.60.5_rf.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_svm_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_svm_n5_3_fold)
## t = -273.53, df = 2, p-value = 1.337e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1714128 -0.1661036
## sample estimates:
## mean of x
## -0.1687582
### Test set diff
diff_tda_pca_5.60.5_svm.n5_test<-(svm_cf_ov_acc - ad_tda_pc_5.60.5_n5_svm_cf0_ov_acc)
diff_tda_pca_5.60.5_svm.n5_test
## Accuracy
## 0.06777232
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_svm.n5_test),-0.01,0.01)
bst_tda_pca_5.60.5_svm.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_svm.n5_test_odds.left<-bst_tda_pca_5.60.5_svm.n5_test$probLeft/bst_tda_pca_5.60.5_svm.n5_test$probRight
bst_tda_pca_5.60.5_svm.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_svm.n5_test),-0.01,0.01)
bsr_tda_pca_5.60.5_svm.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1589333
##
## $winRight
## [1] 0.8410667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_svm.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_svm.n5_test)))
#BayesFactor
#bf_tda_pca_5.60.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_svm.n5_test)) #bf_tda_pca_5.60.5_svm.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_svm.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.60.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n1.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.60.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 15260 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 10173, 10173, 10174
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.7984271 0.354881307
## 0.1 0.50 0.8079291 0.410298159
## 0.1 0.75 0.8141544 0.444011743
## 0.1 1.00 0.8157927 0.458883476
## 0.1 1.25 0.8158585 0.465014365
## 1.0 0.25 0.7637615 0.146797472
## 1.0 0.50 0.7745738 0.216335733
## 1.0 0.75 0.7809958 0.263017354
## 1.0 1.00 0.7844034 0.294498887
## 1.0 1.25 0.7868936 0.317219627
## 10.0 0.25 0.7401704 0.000745815
## 10.0 0.50 0.7427261 0.018764032
## 10.0 0.75 0.7460681 0.045819372
## 10.0 1.00 0.7506553 0.080662433
## 10.0 1.25 0.7524246 0.102486413
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.60.5_n1_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8181639 0.4777044 Fold1
## 2 0.8161620 0.4615179 Fold3
## 3 0.8132495 0.4558208 Fold2
ad_tda_kde_5.60.5_n1_svm_fit_re<-Adult_TDA_KDE_5.60.5_n1_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.60.5_n1_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.60.5_n1_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n1_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.60.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6989 937
## >50K 427 1415
##
## Accuracy : 0.8604
## 95% CI : (0.8533, 0.8672)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5875
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9424
## Specificity : 0.6016
## Pos Pred Value : 0.8818
## Neg Pred Value : 0.7682
## Prevalence : 0.7592
## Detection Rate : 0.7155
## Detection Prevalence : 0.8114
## Balanced Accuracy : 0.7720
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6989 937
## >50K 427 1415
##
## Accuracy : 0.8604
## 95% CI : (0.8533, 0.8672)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5875
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9424
## Specificity : 0.6016
## Pos Pred Value : 0.8818
## Neg Pred Value : 0.7682
## Prevalence : 0.7592
## Detection Rate : 0.7155
## Detection Prevalence : 0.8114
## Balanced Accuracy : 0.7720
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n1_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.603604e-01 5.875345e-01 8.533280e-01 8.671771e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.532756e-136 3.273206e-43
ad_tda_kde_5.60.5_n1_svm_cf0_ov_acc<-ad_tda_kde_5.60.5_n1_svm_cf0$overall[1]
ad_tda_kde_5.60.5_n1_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9424218 0.6016156 0.8817815
## Neg Pred Value Precision Recall
## 0.7681868 0.8817815 0.9424218
## F1 Prevalence Detection Rate
## 0.9110937 0.7592138 0.7154996
## Detection Prevalence Balanced Accuracy
## 0.8114251 0.7720187
ad_tda_kde_5.60.5_n1_svm_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n1_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_svm_n1_3_fold<-(ad_svm_fit_re- ad_tda_kde_5.60.5_n1_svm_fit_re)
diff_tda_kde_5.60.5_svm_n1_3_fold
## Accuracy
## 1 0.006108792
## 2 0.006159650
## 3 0.010256726
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n1_3_fold_odds.left<-bst_tda_kde_5.60.5_svm.n1_3_fold$probLeft/bst_tda_kde_5.60.5_svm.n1_3_fold$probRight
bst_tda_kde_5.60.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.9527667
##
## $winRight
## [1] 0.04723333
# Bayesian Correlated Test
bct_tda_kde_5.60.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n1_3_fold
## $left
## [1] 0.004057276
##
## $rope
## [1] 0.8674647
##
## $right
## [1] 0.128478
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm_n1_3_fold))
#bf_tda_kde_5.60.5_svm.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_svm_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_svm_n1_3_fold)
## t = 5.4636, df = 2, p-value = 0.0319
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.001595482 0.013421297
## sample estimates:
## mean of x
## 0.00750839
### Test set diff
diff_tda_kde_5.60.5_svm.n1_test<-(svm_cf_ov_acc-ad_tda_kde_5.60.5_n1_svm_cf0_ov_acc)
diff_tda_kde_5.60.5_svm.n1_test
## Accuracy
## -0.03337428
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm.n1_test),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n1_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n1_test_odds.left<-bst_tda_kde_5.60.5_svm.n1_test$probLeft/bst_tda_kde_5.60.5_svm.n1_test$probRight
bst_tda_kde_5.60.5_svm.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm.n1_test),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n1_test
## $winLeft
## [1] 0.8396
##
## $winRope
## [1] 0.1604
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_svm.n1_test))
#BayesFactor
#bf_tda_kde_5.60.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm.n1_test)) #bf_tda_kde_5.60.5_svm.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_svm.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_KDE_5.60.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n2.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.60.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 14482 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9655, 9655, 9654
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.7926395 0.401702058
## 0.1 0.50 0.8007185 0.444429113
## 0.1 0.75 0.8038256 0.462387444
## 0.1 1.00 0.8054830 0.475690686
## 0.1 1.25 0.8057592 0.480563318
## 1.0 0.25 0.7466509 0.164469866
## 1.0 0.50 0.7651567 0.261455005
## 1.0 0.75 0.7701283 0.300833599
## 1.0 1.00 0.7739262 0.329642193
## 1.0 1.25 0.7762052 0.347570793
## 10.0 0.25 0.7185472 0.001407236
## 10.0 0.50 0.7235188 0.029221499
## 10.0 0.75 0.7283524 0.064216885
## 10.0 1.00 0.7328408 0.098316679
## 10.0 1.25 0.7347052 0.118500663
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.60.5_n2_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8062979 0.4826868 Fold1
## 2 0.8003314 0.4662903 Fold3
## 3 0.8106484 0.4927129 Fold2
ad_tda_kde_5.60.5_n2_svm_fit_re<-Adult_TDA_KDE_5.60.5_n2_SvmFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n2_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.60.5_n2_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n2_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.60.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7030 926
## >50K 386 1426
##
## Accuracy : 0.8657
## 95% CI : (0.8588, 0.8724)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6014
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9480
## Specificity : 0.6063
## Pos Pred Value : 0.8836
## Neg Pred Value : 0.7870
## Prevalence : 0.7592
## Detection Rate : 0.7197
## Detection Prevalence : 0.8145
## Balanced Accuracy : 0.7771
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7030 926
## >50K 386 1426
##
## Accuracy : 0.8657
## 95% CI : (0.8588, 0.8724)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6014
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9480
## Specificity : 0.6063
## Pos Pred Value : 0.8836
## Neg Pred Value : 0.7870
## Prevalence : 0.7592
## Detection Rate : 0.7197
## Detection Prevalence : 0.8145
## Balanced Accuracy : 0.7771
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n2_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.656839e-01 6.013843e-01 8.587614e-01 8.723875e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.021665e-151 4.402141e-50
ad_tda_kde_5.60.5_n2_svm_cf0_ov_acc<-ad_tda_kde_5.60.5_n2_svm_cf0$overall[1]
ad_tda_kde_5.60.5_n2_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9479504 0.6062925 0.8836099
## Neg Pred Value Precision Recall
## 0.7869757 0.8836099 0.9479504
## F1 Prevalence Detection Rate
## 0.9146500 0.7592138 0.7196970
## Detection Prevalence Balanced Accuracy
## 0.8144963 0.7771214
ad_tda_kde_5.60.5_n2_svm_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n2_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_svm_n2_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.60.5_n2_svm_fit_re)
diff_tda_kde_5.60.5_svm_n2_3_fold
## Accuracy
## 1 0.01797483
## 2 0.02199026
## 3 0.01285775
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n2_3_fold_odds.left<-bst_tda_kde_5.60.5_svm.n2_3_fold$probLeft/bst_tda_kde_5.60.5_svm.n2_3_fold$probRight
bst_tda_kde_5.60.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0901
##
## $winRight
## [1] 0.9099
# Bayesian Correlated Test
bct_tda_kde_5.60.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n2_3_fold
## $left
## [1] 0.005999024
##
## $rope
## [1] 0.05910161
##
## $right
## [1] 0.9348994
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm_n2_3_fold))
#bf_tda_kde_5.60.5_svm.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_svm_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_svm_n2_3_fold)
## t = 6.6627, df = 2, p-value = 0.02179
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.006236928 0.028978302
## sample estimates:
## mean of x
## 0.01760762
### Test set diff
diff_tda_kde_5.60.5_svm.n2_test<-(svm_cf_ov_acc-ad_tda_kde_5.60.5_n2_svm_cf0_ov_acc)
diff_tda_kde_5.60.5_svm.n2_test
## Accuracy
## -0.03869779
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm.n2_test),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n2_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n2_test_odds.left<-bst_tda_kde_5.60.5_svm.n2_test$probLeft/bst_tda_kde_5.60.5_svm.n2_test$probRight
bst_tda_kde_5.60.5_svm.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm.n2_test),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n2_test
## $winLeft
## [1] 0.8402667
##
## $winRope
## [1] 0.1597333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_svm.n2_test))
#BayesFactor
#bf_tda_kde_5.60.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm.n2_test)) #bf_tda_kde_5.60.5_svm.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_svm.n2_test))
##Node3
Adult_TDA_KDE_5.60.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n3.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.60.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 13266 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8844, 8844, 8844
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.7893864 0.368408514
## 0.1 0.50 0.8004674 0.424802870
## 0.1 0.75 0.8049148 0.447616022
## 0.1 1.00 0.8052917 0.455670488
## 0.1 1.25 0.8050656 0.459167321
## 1.0 0.25 0.7561435 0.170952931
## 1.0 0.50 0.7658676 0.240972544
## 1.0 0.75 0.7739334 0.289713208
## 1.0 1.00 0.7785316 0.321958954
## 1.0 1.25 0.7802653 0.338553037
## 10.0 0.25 0.7299864 0.001732385
## 10.0 0.50 0.7324740 0.023796311
## 10.0 0.75 0.7358661 0.052819809
## 10.0 1.00 0.7411428 0.093118355
## 10.0 1.25 0.7421981 0.112321505
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_KDE_5.60.5_n3_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7991859 0.4436653 Fold1
## 2 0.8095884 0.4657166 Fold2
## 3 0.8071009 0.4576296 Fold3
ad_tda_kde_5.60.5_n3_svm_fit_re<-Adult_TDA_KDE_5.60.5_n3_SvmFit0 $resample[1]
summary(Adult_TDA_KDE_5.60.5_n3_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.60.5_n3_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n3_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.60.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7036 1081
## >50K 380 1271
##
## Accuracy : 0.8504
## 95% CI : (0.8432, 0.8574)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5446
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9488
## Specificity : 0.5404
## Pos Pred Value : 0.8668
## Neg Pred Value : 0.7698
## Prevalence : 0.7592
## Detection Rate : 0.7203
## Detection Prevalence : 0.8310
## Balanced Accuracy : 0.7446
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7036 1081
## >50K 380 1271
##
## Accuracy : 0.8504
## 95% CI : (0.8432, 0.8574)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5446
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9488
## Specificity : 0.5404
## Pos Pred Value : 0.8668
## Neg Pred Value : 0.7698
## Prevalence : 0.7592
## Detection Rate : 0.7203
## Detection Prevalence : 0.8310
## Balanced Accuracy : 0.7446
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n3_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.504300e-01 5.445652e-01 8.432017e-01 8.574486e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.574372e-109 6.450309e-75
ad_tda_kde_5.60.5_n3_svm_cf0_ov_acc<-ad_tda_kde_5.60.5_n3_svm_cf0$overall[1]
ad_tda_kde_5.60.5_n3_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9487594 0.5403912 0.8668227
## Neg Pred Value Precision Recall
## 0.7698365 0.8668227 0.9487594
## F1 Prevalence Detection Rate
## 0.9059422 0.7592138 0.7203112
## Detection Prevalence Balanced Accuracy
## 0.8309787 0.7445753
ad_tda_kde_5.60.5_n3_svm_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n3_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_svm_n3_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.60.5_n3_svm_fit_re)
diff_tda_kde_5.60.5_svm_n3_3_fold
## Accuracy
## 1 0.02508685
## 2 0.01273324
## 3 0.01640533
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n3_3_fold_odds.left<-bst_tda_kde_5.60.5_svm.n3_3_fold$probLeft/bst_tda_kde_5.60.5_svm.n3_3_fold$probRight
bst_tda_kde_5.60.5_svm.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.08963333
##
## $winRight
## [1] 0.9103667
# Bayesian Correlated Test
bct_tda_kde_5.60.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n3_3_fold
## $left
## [1] 0.01097393
##
## $rope
## [1] 0.08723392
##
## $right
## [1] 0.9017921
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm_n3_3_fold))
#bf_tda_kde_5.60.5_svm.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_svm_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_svm_n3_3_fold)
## t = 4.935, df = 2, p-value = 0.03869
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.002316204 0.033834075
## sample estimates:
## mean of x
## 0.01807514
### Test set diff
diff_tda_kde_5.60.5_svm.n3_test<-(svm_cf_ov_acc-ad_tda_kde_5.60.5_n3_svm_cf0_ov_acc)
diff_tda_kde_5.60.5_svm.n3_test
## Accuracy
## -0.0234439
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm.n3_test),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n3_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n3_test_odds.left<-bst_tda_kde_5.60.5_svm.n3_test$probLeft/bst_tda_kde_5.60.5_svm.n3_test$probRight
bst_tda_kde_5.60.5_svm.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm.n3_test),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n3_test
## $winLeft
## [1] 0.8407333
##
## $winRope
## [1] 0.1592667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_svm.n3_test))
#BayesFactor
#bf_tda_kde_5.60.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm.n3_test)) #bf_tda_kde_5.60.5_svm.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_svm.n3_test))
##Node4
Adult_TDA_KDE_5.60.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n4.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.60.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 11795 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7863, 7864, 7863
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.8176347 0.299483643
## 0.1 0.50 0.8269606 0.376307547
## 0.1 0.75 0.8315389 0.409558299
## 0.1 1.00 0.8331498 0.425270442
## 0.1 1.25 0.8327257 0.431663114
## 1.0 0.25 0.7994916 0.127471144
## 1.0 0.50 0.8068670 0.199677506
## 1.0 0.75 0.8096650 0.245392404
## 1.0 1.00 0.8133952 0.281703866
## 1.0 1.25 0.8136496 0.298986548
## 10.0 0.25 0.7861806 0.001075727
## 10.0 0.50 0.7864350 0.009224312
## 10.0 0.75 0.7880459 0.035149825
## 10.0 1.00 0.7905895 0.071308500
## 10.0 1.25 0.7916916 0.096655028
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_KDE_5.60.5_n4_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8344354 0.4327856 Fold1
## 2 0.8351564 0.4270883 Fold2
## 3 0.8298576 0.4159374 Fold3
ad_tda_kde_5.60.5_n4_svm_fit_re<-Adult_TDA_KDE_5.60.5_n4_SvmFit0 $resample[1]
summary(Adult_TDA_KDE_5.60.5_n4_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.60.5_n4_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.60.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7136 1448
## >50K 280 904
##
## Accuracy : 0.8231
## 95% CI : (0.8154, 0.8306)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4174
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9622
## Specificity : 0.3844
## Pos Pred Value : 0.8313
## Neg Pred Value : 0.7635
## Prevalence : 0.7592
## Detection Rate : 0.7305
## Detection Prevalence : 0.8788
## Balanced Accuracy : 0.6733
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7136 1448
## >50K 280 904
##
## Accuracy : 0.8231
## 95% CI : (0.8154, 0.8306)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4174
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9622
## Specificity : 0.3844
## Pos Pred Value : 0.8313
## Neg Pred Value : 0.7635
## Prevalence : 0.7592
## Detection Rate : 0.7305
## Detection Prevalence : 0.8788
## Balanced Accuracy : 0.6733
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n4_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.230958e-01 4.173615e-01 8.153814e-01 8.306171e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 6.147212e-53 2.054904e-173
ad_tda_kde_5.60.5_n4_svm_cf0_ov_acc<-ad_tda_kde_5.60.5_n4_svm_cf0$overall[1]
ad_tda_kde_5.60.5_n4_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9622438 0.3843537 0.8313141
## Neg Pred Value Precision Recall
## 0.7635135 0.8313141 0.9622438
## F1 Prevalence Detection Rate
## 0.8920000 0.7592138 0.7305487
## Detection Prevalence Balanced Accuracy
## 0.8787879 0.6732988
ad_tda_kde_5.60.5_n4_svm_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n4_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_svm_n4_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.60.5_n4_svm_fit_re)
diff_tda_kde_5.60.5_svm_n4_3_fold
## Accuracy
## 1 -0.010162663
## 2 -0.012834785
## 3 -0.006351393
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n4_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n4_3_fold_odds.left<-bst_tda_kde_5.60.5_svm.n4_3_fold$probLeft/bst_tda_kde_5.60.5_svm.n4_3_fold$probRight
bst_tda_kde_5.60.5_svm.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n4_3_fold
## $winLeft
## [1] 0.3290667
##
## $winRope
## [1] 0.6709333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n4_3_fold
## $left
## [1] 0.46476
##
## $rope
## [1] 0.5293185
##
## $right
## [1] 0.005921444
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm_n4_3_fold))
#bf_tda_kde_5.60.5_svm.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_svm_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_svm_n4_3_fold)
## t = -5.2004, df = 2, p-value = 0.03504
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.01787709 -0.00168880
## sample estimates:
## mean of x
## -0.009782947
### Test set diff
diff_tda_kde_5.60.5_svm.n4_test<-(svm_cf_ov_acc-ad_tda_kde_5.60.5_n4_svm_cf0_ov_acc)
diff_tda_kde_5.60.5_svm.n4_test
## Accuracy
## 0.003890254
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm.n4_test),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n4_test_odds.left<-bst_tda_kde_5.60.5_svm.n4_test$probLeft/bst_tda_kde_5.60.5_svm.n4_test$probRight
bst_tda_kde_5.60.5_svm.n4_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm.n4_test),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_svm.n4_test))
#BayesFactor
#bf_tda_kde_5.60.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm.n4_test)) #bf_tda_kde_5.60.5_svm.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_svm.n4_test))
##Node5
Adult_TDA_KDE_5.60.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n3.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.1st.4th, V4.5th.6th,
## V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.60.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 13266 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8844, 8844, 8844
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.7899141 0.374262485
## 0.1 0.50 0.8003166 0.428756800
## 0.1 0.75 0.8029549 0.446138492
## 0.1 1.00 0.8032564 0.454106944
## 0.1 1.25 0.8034826 0.460489464
## 1.0 0.25 0.7546359 0.163067046
## 1.0 0.50 0.7671491 0.244132494
## 1.0 0.75 0.7748379 0.291564920
## 1.0 1.00 0.7780793 0.319779932
## 1.0 1.25 0.7801146 0.338322308
## 10.0 0.25 0.7298357 0.000919086
## 10.0 0.50 0.7327001 0.025480097
## 10.0 0.75 0.7363938 0.055986804
## 10.0 1.00 0.7418212 0.097166193
## 10.0 1.25 0.7434796 0.118338862
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.60.5_n5_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8012212 0.4547974 Fold1
## 2 0.8043872 0.4637866 Fold3
## 3 0.8048394 0.4628844 Fold2
ad_tda_kde_5.60.5_n5_svm_fit_re<-Adult_TDA_KDE_5.60.5_n5_SvmFit0 $resample[1]
summary(Adult_TDA_KDE_5.60.5_n5_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.60.5_n5_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.60.5_n5_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.60.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7014 1035
## >50K 402 1317
##
## Accuracy : 0.8529
## 95% CI : (0.8457, 0.8599)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5569
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9458
## Specificity : 0.5599
## Pos Pred Value : 0.8714
## Neg Pred Value : 0.7661
## Prevalence : 0.7592
## Detection Rate : 0.7181
## Detection Prevalence : 0.8240
## Balanced Accuracy : 0.7529
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7014 1035
## >50K 402 1317
##
## Accuracy : 0.8529
## 95% CI : (0.8457, 0.8599)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5569
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9458
## Specificity : 0.5599
## Pos Pred Value : 0.8714
## Neg Pred Value : 0.7661
## Prevalence : 0.7592
## Detection Rate : 0.7181
## Detection Prevalence : 0.8240
## Balanced Accuracy : 0.7529
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n5_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.528870e-01 5.569159e-01 8.457061e-01 8.598567e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 8.774525e-116 2.093335e-62
ad_tda_kde_5.60.5_n5_svm_cf0_ov_acc<-ad_tda_kde_5.60.5_n5_svm_cf0$overall[1]
ad_tda_kde_5.60.5_n5_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9457929 0.5599490 0.8714126
## Neg Pred Value Precision Recall
## 0.7661431 0.8714126 0.9457929
## F1 Prevalence Detection Rate
## 0.9070805 0.7592138 0.7180590
## Detection Prevalence Balanced Accuracy
## 0.8240172 0.7528709
ad_tda_kde_5.60.5_n5_svm_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n5_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_svm_n5_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.60.5_n5_svm_fit_re)
diff_tda_kde_5.60.5_svm_n5_3_fold
## Accuracy
## 1 0.02305157
## 2 0.01793451
## 3 0.01866675
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n5_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n5_3_fold_odds.left<-bst_tda_kde_5.60.5_svm.n5_3_fold$probLeft/bst_tda_kde_5.60.5_svm.n5_3_fold$probRight
bst_tda_kde_5.60.5_svm.n5_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n5_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.09083333
##
## $winRight
## [1] 0.9091667
# Bayesian Correlated Test
bct_tda_kde_5.60.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n5_3_fold
## $left
## [1] 0.00189468
##
## $rope
## [1] 0.01466324
##
## $right
## [1] 0.9834421
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_svm.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm_n5_3_fold))
#bf_tda_kde_5.60.5_svm.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_svm_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_svm_n5_3_fold)
## t = 12.446, df = 2, p-value = 0.006394
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.01300996 0.02675859
## sample estimates:
## mean of x
## 0.01988428
nn1Grid<-expand.grid(size = c(2,3,5,7), decay = c(0.3,0.5,0.7))
#Neural Network
adultNn1Fit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 14914.247485
## iter 10 value 8387.827451
## iter 10 value 8387.827445
## final value 8387.827314
## converged
## # weights: 331
## initial value 8934.347194
## iter 10 value 7808.621355
## iter 20 value 7695.138728
## iter 30 value 7694.826011
## iter 40 value 7694.718736
## iter 50 value 7690.104046
## iter 60 value 7680.254181
## iter 70 value 7659.971283
## iter 80 value 7653.768876
## iter 90 value 7645.410266
## iter 100 value 7525.511899
## final value 7525.511899
## stopped after 100 iterations
## # weights: 551
## initial value 15270.611564
## iter 10 value 8317.553863
## iter 20 value 8140.854287
## iter 30 value 7730.047655
## iter 40 value 7684.776575
## iter 50 value 7666.542234
## iter 60 value 7643.445407
## iter 70 value 7581.690526
## iter 80 value 7543.649568
## iter 90 value 7505.937117
## iter 100 value 7363.796783
## final value 7363.796783
## stopped after 100 iterations
## # weights: 771
## initial value 16294.703309
## iter 10 value 7786.612502
## iter 20 value 7685.449279
## iter 30 value 7671.905931
## iter 40 value 7660.658740
## iter 50 value 7646.353470
## iter 60 value 7631.106499
## iter 70 value 7592.530816
## iter 80 value 7542.472357
## iter 90 value 7504.258442
## iter 100 value 7488.577910
## final value 7488.577910
## stopped after 100 iterations
## # weights: 221
## initial value 15499.745841
## iter 10 value 8388.053418
## iter 20 value 8388.042862
## iter 20 value 8388.042799
## iter 20 value 8388.042793
## final value 8388.042793
## converged
## # weights: 331
## initial value 12875.025503
## iter 10 value 8267.489254
## iter 20 value 7708.902559
## iter 30 value 7707.959692
## iter 40 value 7683.777898
## iter 50 value 7571.977732
## iter 60 value 7477.751846
## iter 70 value 7471.083962
## iter 80 value 7397.102950
## iter 90 value 7372.392331
## iter 100 value 7341.382346
## final value 7341.382346
## stopped after 100 iterations
## # weights: 551
## initial value 10967.055006
## iter 10 value 8333.175880
## iter 20 value 8323.905758
## iter 30 value 8113.213696
## iter 40 value 7573.865713
## iter 50 value 7543.210403
## iter 60 value 7516.513403
## iter 70 value 7054.955298
## iter 80 value 6544.396974
## iter 90 value 6244.004392
## iter 100 value 5864.693839
## final value 5864.693839
## stopped after 100 iterations
## # weights: 771
## initial value 8557.405771
## iter 10 value 7721.896197
## iter 20 value 7703.675682
## iter 30 value 7679.899357
## iter 40 value 7591.105270
## iter 50 value 7579.772570
## iter 60 value 7511.924843
## iter 70 value 7469.908575
## iter 80 value 7464.322207
## iter 90 value 7330.304185
## iter 100 value 6602.928255
## final value 6602.928255
## stopped after 100 iterations
## # weights: 221
## initial value 10796.872751
## iter 10 value 8386.359593
## iter 20 value 7752.958524
## iter 30 value 7722.945264
## iter 40 value 7663.040625
## iter 50 value 7621.125211
## iter 60 value 7607.043979
## iter 70 value 7582.395552
## iter 80 value 7400.172667
## iter 90 value 7215.413595
## iter 100 value 6615.114583
## final value 6615.114583
## stopped after 100 iterations
## # weights: 331
## initial value 11567.665716
## iter 10 value 8318.682666
## iter 20 value 7836.418097
## iter 30 value 7826.643549
## iter 40 value 7758.193283
## iter 50 value 7735.975032
## iter 60 value 7681.647350
## iter 70 value 7648.037579
## iter 80 value 7548.353306
## iter 90 value 7396.481260
## iter 100 value 6548.994585
## final value 6548.994585
## stopped after 100 iterations
## # weights: 551
## initial value 8684.052730
## iter 10 value 8101.316344
## iter 20 value 7719.153662
## iter 30 value 7705.102736
## iter 40 value 7328.030580
## iter 50 value 6936.281710
## iter 60 value 6904.159106
## iter 70 value 6503.108413
## iter 80 value 6083.116519
## iter 90 value 5728.454514
## iter 100 value 5540.287574
## final value 5540.287574
## stopped after 100 iterations
## # weights: 771
## initial value 16698.339714
## iter 10 value 7809.493522
## iter 20 value 7542.760966
## iter 30 value 7529.377824
## iter 40 value 7460.839476
## iter 50 value 7189.834582
## iter 60 value 6990.211510
## iter 70 value 6981.244136
## iter 80 value 6691.671055
## iter 90 value 5748.327853
## iter 100 value 5307.371888
## final value 5307.371888
## stopped after 100 iterations
## # weights: 221
## initial value 9007.370545
## iter 10 value 8387.893793
## iter 20 value 8370.412757
## iter 30 value 8355.144558
## iter 40 value 8062.565654
## iter 50 value 7772.953460
## iter 60 value 7374.245172
## iter 70 value 6661.141222
## iter 80 value 6248.936301
## iter 90 value 6128.051239
## iter 100 value 5992.725327
## final value 5992.725327
## stopped after 100 iterations
## # weights: 331
## initial value 10951.458356
## iter 10 value 8332.325545
## iter 20 value 7823.972465
## iter 30 value 7780.903226
## iter 40 value 7745.630955
## iter 50 value 7739.642084
## iter 60 value 7738.543237
## iter 70 value 7728.614195
## iter 80 value 7657.519923
## iter 90 value 7538.164705
## iter 100 value 7517.264919
## final value 7517.264919
## stopped after 100 iterations
## # weights: 551
## initial value 9491.348203
## iter 10 value 8233.914449
## iter 20 value 7970.146725
## iter 30 value 7800.374269
## iter 40 value 7796.272701
## iter 50 value 7787.750936
## iter 60 value 7760.391994
## iter 70 value 7616.086201
## iter 80 value 7455.184358
## iter 90 value 7409.753912
## iter 100 value 7343.103830
## final value 7343.103830
## stopped after 100 iterations
## # weights: 771
## initial value 8435.606022
## iter 10 value 8314.294684
## iter 20 value 7834.906866
## iter 30 value 7617.521283
## iter 40 value 7601.464600
## iter 50 value 7596.253449
## iter 60 value 7456.076859
## iter 70 value 7431.776587
## iter 80 value 7160.344816
## iter 90 value 6908.360433
## iter 100 value 6139.029521
## final value 6139.029521
## stopped after 100 iterations
## # weights: 221
## initial value 12984.644054
## iter 10 value 8565.375631
## iter 20 value 7775.349984
## iter 30 value 7774.925575
## iter 40 value 7756.925989
## iter 50 value 7608.131861
## iter 60 value 7550.067860
## iter 70 value 7419.373239
## iter 80 value 7071.728904
## iter 90 value 6310.673260
## iter 100 value 5698.521509
## final value 5698.521509
## stopped after 100 iterations
## # weights: 331
## initial value 13843.624590
## iter 10 value 8288.530559
## iter 20 value 7665.333543
## iter 30 value 7581.870900
## iter 30 value 7581.870885
## final value 7581.870885
## converged
## # weights: 551
## initial value 13089.968565
## iter 10 value 8044.819335
## iter 20 value 7845.610189
## iter 30 value 7681.548660
## iter 40 value 7618.996656
## iter 50 value 7572.067203
## iter 60 value 7540.338331
## iter 70 value 7522.611949
## iter 80 value 7311.586164
## iter 90 value 6996.529531
## iter 100 value 6655.909450
## final value 6655.909450
## stopped after 100 iterations
## # weights: 771
## initial value 9765.548507
## iter 10 value 8212.363355
## iter 20 value 7824.273923
## iter 30 value 7772.815641
## iter 40 value 7751.053394
## iter 50 value 7732.036973
## iter 60 value 7668.934206
## iter 70 value 7646.818132
## iter 80 value 7601.303018
## iter 90 value 7493.928824
## iter 100 value 7418.581645
## final value 7418.581645
## stopped after 100 iterations
## # weights: 221
## initial value 8849.572798
## iter 10 value 7970.702123
## iter 20 value 7816.401644
## iter 30 value 7812.858798
## iter 40 value 7757.905728
## iter 50 value 7577.625843
## iter 60 value 7520.045779
## iter 70 value 7456.798749
## iter 80 value 7313.957474
## iter 90 value 7164.603052
## iter 100 value 6121.989153
## final value 6121.989153
## stopped after 100 iterations
## # weights: 331
## initial value 9379.607285
## iter 10 value 8388.421617
## iter 20 value 8369.172128
## iter 30 value 8332.031073
## iter 40 value 8310.905049
## iter 50 value 7822.778864
## iter 60 value 7471.508794
## iter 70 value 6777.512118
## iter 80 value 6175.471431
## iter 90 value 5609.965802
## iter 100 value 5282.472146
## final value 5282.472146
## stopped after 100 iterations
## # weights: 551
## initial value 14172.587821
## iter 10 value 8161.102118
## iter 20 value 7841.393426
## iter 30 value 7752.389698
## iter 40 value 7748.583088
## iter 50 value 7740.483911
## iter 60 value 7585.009885
## iter 70 value 7548.991549
## iter 80 value 7533.416049
## iter 90 value 7523.933185
## iter 100 value 7504.973765
## final value 7504.973765
## stopped after 100 iterations
## # weights: 771
## initial value 8440.807063
## iter 10 value 7775.610906
## iter 20 value 7762.108417
## iter 30 value 7747.050717
## iter 40 value 7742.501701
## iter 50 value 7731.745854
## iter 60 value 7619.866913
## iter 70 value 7569.581747
## iter 80 value 7563.850584
## iter 90 value 7465.587301
## iter 100 value 7417.631348
## final value 7417.631348
## stopped after 100 iterations
## # weights: 221
## initial value 11368.699999
## final value 8389.250891
## converged
## # weights: 331
## initial value 14067.553992
## iter 10 value 8389.251061
## final value 8389.250871
## converged
## # weights: 551
## initial value 12931.852167
## iter 10 value 8048.585914
## iter 20 value 8033.248236
## final value 8032.929443
## converged
## # weights: 771
## initial value 9359.205323
## iter 10 value 8341.182609
## iter 20 value 7764.602072
## iter 30 value 7656.621439
## iter 40 value 7619.348444
## iter 50 value 7488.368833
## iter 60 value 7334.702048
## iter 70 value 7046.359244
## iter 80 value 6330.604012
## iter 90 value 5579.032428
## iter 100 value 5532.368850
## final value 5532.368850
## stopped after 100 iterations
## # weights: 221
## initial value 12741.268962
## iter 10 value 8381.909271
## iter 20 value 8039.954343
## iter 30 value 7780.214751
## iter 40 value 7772.950439
## iter 50 value 7769.281228
## final value 7766.136267
## converged
## # weights: 331
## initial value 8812.657497
## iter 10 value 8243.470361
## iter 20 value 7861.344462
## iter 30 value 7770.690723
## iter 40 value 7768.443349
## iter 50 value 7762.604336
## iter 60 value 7756.711841
## iter 70 value 7686.251124
## iter 80 value 7603.987494
## iter 90 value 7595.520572
## iter 100 value 7485.892896
## final value 7485.892896
## stopped after 100 iterations
## # weights: 551
## initial value 12214.039002
## iter 10 value 8257.195400
## iter 20 value 7793.433615
## iter 30 value 7607.795668
## iter 40 value 7564.642816
## iter 50 value 7558.264906
## iter 60 value 7556.097129
## iter 70 value 7553.282434
## iter 80 value 7550.531224
## iter 90 value 7550.152798
## iter 100 value 7549.331992
## final value 7549.331992
## stopped after 100 iterations
## # weights: 771
## initial value 9358.061901
## iter 10 value 8133.967595
## iter 20 value 7735.799556
## iter 30 value 7525.124606
## iter 40 value 7504.565786
## iter 50 value 7489.430183
## iter 60 value 7470.148511
## iter 70 value 7451.607121
## iter 80 value 7418.243338
## iter 90 value 7402.781472
## iter 100 value 7391.955274
## final value 7391.955274
## stopped after 100 iterations
## # weights: 221
## initial value 12787.513808
## iter 10 value 8389.608822
## iter 20 value 7785.227973
## iter 30 value 7778.267470
## iter 40 value 7778.233084
## iter 50 value 7774.657702
## iter 50 value 7774.657691
## final value 7774.657691
## converged
## # weights: 331
## initial value 12243.635329
## iter 10 value 8419.227557
## iter 20 value 8389.265532
## iter 30 value 8361.967251
## iter 40 value 7784.554187
## iter 50 value 7761.896107
## iter 60 value 7743.986618
## iter 70 value 7587.812085
## iter 80 value 7548.866412
## iter 90 value 7391.411653
## iter 100 value 7129.129194
## final value 7129.129194
## stopped after 100 iterations
## # weights: 551
## initial value 12141.619444
## iter 10 value 8235.199570
## iter 20 value 7998.059485
## iter 30 value 7792.905576
## iter 40 value 7775.919897
## iter 50 value 7768.861347
## iter 60 value 7754.033756
## iter 70 value 7732.614733
## iter 80 value 7699.392212
## iter 90 value 7667.849845
## iter 100 value 7572.755801
## final value 7572.755801
## stopped after 100 iterations
## # weights: 771
## initial value 15261.419720
## iter 10 value 8364.038019
## iter 20 value 8125.621020
## iter 30 value 7808.378483
## iter 40 value 7628.274448
## iter 50 value 7594.937758
## iter 60 value 7586.909450
## iter 70 value 7576.374040
## iter 80 value 7571.920047
## iter 90 value 7446.012084
## iter 100 value 7349.163902
## final value 7349.163902
## stopped after 100 iterations
## # weights: 331
## initial value 30004.133921
## iter 10 value 11969.553227
## iter 20 value 11628.553351
## iter 30 value 11423.960902
## iter 40 value 11376.226663
## iter 50 value 11373.077231
## iter 60 value 11338.672213
## iter 70 value 11163.310233
## iter 80 value 11010.896927
## iter 90 value 10524.613644
## iter 100 value 10226.328138
## final value 10226.328138
## stopped after 100 iterations
### Test set diff
diff_tda_kde_5.60.5_svm.n5_test<-(svm_cf_ov_acc-ad_tda_kde_5.60.5_n5_svm_cf0_ov_acc)
diff_tda_kde_5.60.5_svm.n5_test
## Accuracy
## -0.0259009
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_svm.n5_test),-0.01,0.01)
bst_tda_kde_5.60.5_svm.n5_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_svm.n5_test_odds.left<-bst_tda_kde_5.60.5_svm.n5_test$probLeft/bst_tda_kde_5.60.5_svm.n5_test$probRight
bst_tda_kde_5.60.5_svm.n5_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_svm.n5_test),-0.01,0.01)
bsr_tda_kde_5.60.5_svm.n5_test
## $winLeft
## [1] 0.84
##
## $winRope
## [1] 0.16
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_svm.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_svm.n5_test))
#BayesFactor
#bf_tda_kde_5.60.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_svm.n5_test)) #bf_tda_kde_5.60.5_svm.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_svm.n4_test))
#Non-TDA-Assisted
adultNn1Fit
## Neural Network
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15195, 15195, 15196
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.7722979 0.07902348
## 2 0.5 0.7902428 0.26366959
## 2 0.7 0.8062120 0.40187788
## 3 0.3 0.7841870 0.15337122
## 3 0.5 0.7984471 0.28576293
## 3 0.7 0.8198126 0.48834476
## 5 0.3 0.7905932 0.22345924
## 5 0.5 0.8086252 0.31627472
## 5 0.7 0.7958584 0.26765852
## 7 0.3 0.8067837 0.33051335
## 7 0.5 0.7998950 0.26059533
## 7 0.7 0.7989735 0.25366012
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.7.
adultNn1Fit$resample
## Accuracy Kappa Resample
## 1 0.8079505 0.4185547 Fold3
## 2 0.8500921 0.5902927 Fold2
## 3 0.8013951 0.4561869 Fold1
ad_nn1_fit_re<-adultNn1Fit$resample[1]
summary(adultNn1Fit)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.7
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## -2.07 -0.04 -0.47 -1.02 -1.34 0.01 -0.01 1.60
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## -0.03 -0.74 -0.07 0.00 -0.48 0.37 -0.02 -0.14
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.15 0.37 -0.40 -0.93 -1.26 -5.00 1.95 0.87
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 4.25 -0.50 1.23 -2.50 0.45 -1.88 0.28 1.58
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## -1.04 0.07 -0.76 -0.31 -0.45 -0.10 0.14 -1.23
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.91 -0.61 -0.71 -0.03 -1.45 -0.26 2.56 0.11
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## -0.84 -0.55 0.43 -2.41 -2.14 0.25 -2.01 -0.35
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 4.61 -1.14 -0.96 0.82 -0.12 -0.67 0.03 -2.09
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.05 -0.35 -0.57 -0.09 -0.24 -0.06
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.02 -0.19 0.02 -0.03 0.05 0.18 -0.12 -0.07
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## -0.06 0.01 0.00 0.06 -0.17 -0.02 0.33 0.27
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## -0.01 -0.14 0.03 0.50 0.09 0.03 -0.01 0.01
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.08 -0.84 0.05 0.11 -0.30 0.03 -0.25 0.07
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.01 -0.02 -0.45 -0.06 0.01
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 1.81 0.07 0.17 -0.60 0.17 0.00 -0.36 -0.36
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 2.05 0.74 0.00 0.00 0.64 0.56 0.01 -0.05
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## -0.47 1.23 1.10 -0.69 0.55 -2.79 0.46 -0.52
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.50 0.15 0.14 0.96 0.01 -0.09 0.00 -1.25
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.12 1.80 -0.10 1.34 0.17 -1.53 0.00 -0.62
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## -0.40 2.08 -0.64 -0.75 1.54 -0.01 -0.38 0.99
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.68 -0.30 0.99 -0.85 -0.59 1.27 0.41 1.75
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## -0.17 -0.37 0.32 -0.32 0.14 2.04 1.36 0.45
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 -0.07 -0.01 0.10 0.01 -0.05 0.09
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.34 0.00 0.00 0.93 0.24 -0.04 0.02 0.45
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.01 0.00 0.00 0.00 0.00 -0.29 0.04 0.02
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 -0.16 0.01 -0.04 0.00 -0.51 0.11 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.01 0.03 0.06 -0.04 -0.06 0.05 -0.01
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 -0.01 0.44 0.23 -0.19
## b->o h1->o h2->o h3->o
## 1.26 1.28 1.27 -4.15
vip(adultNn1Fit,25) + ggtitle("non-TDA-Assited NN")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultNn1Fit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
nn1_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nn1_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6956 1444
## >50K 460 908
##
## Accuracy : 0.8051
## 95% CI : (0.7971, 0.8129)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.378
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9380
## Specificity : 0.3861
## Pos Pred Value : 0.8281
## Neg Pred Value : 0.6637
## Prevalence : 0.7592
## Detection Rate : 0.7121
## Detection Prevalence : 0.8600
## Balanced Accuracy : 0.6620
##
## 'Positive' Class : <=50K
##
nn1_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.050778e-01 3.780235e-01 7.970803e-01 8.128930e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.086498e-27 2.213692e-112
nn1_cf_ov_acc<-nn1_cf$overall[1]
nn1_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9379720 0.3860544 0.8280952
## Neg Pred Value Precision Recall
## 0.6637427 0.8280952 0.9379720
## F1 Prevalence Detection Rate
## 0.8796156 0.7592138 0.7121212
## Detection Prevalence Balanced Accuracy
## 0.8599509 0.6620132
nn1_cf_pre_rec_f1<-nn1_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
#Neural Network 1
Adult_TDA_PC_5.60.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n1.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 4824.713668
## iter 10 value 1492.873618
## iter 20 value 1480.412836
## iter 30 value 1392.296209
## iter 40 value 1219.014250
## iter 50 value 1095.672688
## iter 60 value 1039.633241
## iter 70 value 1013.658978
## iter 80 value 1009.479886
## iter 90 value 1008.325876
## iter 100 value 1008.289157
## final value 1008.289157
## stopped after 100 iterations
## # weights: 331
## initial value 2097.492037
## iter 10 value 1471.597256
## iter 20 value 1457.429380
## iter 30 value 1427.590351
## iter 40 value 1287.840868
## iter 50 value 1071.950320
## iter 60 value 1032.493835
## iter 70 value 1017.112612
## iter 80 value 1010.874037
## iter 90 value 1001.307134
## iter 100 value 998.421712
## final value 998.421712
## stopped after 100 iterations
## # weights: 551
## initial value 3145.588679
## iter 10 value 1466.903187
## iter 20 value 1428.358585
## iter 30 value 1320.702943
## iter 40 value 1198.302588
## iter 50 value 1085.634314
## iter 60 value 1066.962941
## iter 70 value 1027.297321
## iter 80 value 1014.696459
## iter 90 value 1014.536065
## iter 100 value 1013.101421
## final value 1013.101421
## stopped after 100 iterations
## # weights: 771
## initial value 2956.220491
## iter 10 value 1467.772897
## iter 20 value 1426.683725
## iter 30 value 1408.278015
## iter 40 value 1390.349667
## iter 50 value 1364.575860
## iter 60 value 1252.930283
## iter 70 value 1144.571149
## iter 80 value 1076.136885
## iter 90 value 1043.144819
## iter 100 value 1019.657711
## final value 1019.657711
## stopped after 100 iterations
## # weights: 221
## initial value 2657.593141
## iter 10 value 1472.079642
## final value 1472.078881
## converged
## # weights: 331
## initial value 3466.925526
## iter 10 value 1474.851869
## iter 20 value 1471.733710
## iter 30 value 1471.695002
## final value 1471.694950
## converged
## # weights: 551
## initial value 3444.429762
## iter 10 value 1995.433601
## iter 20 value 1480.007340
## iter 30 value 1465.879996
## iter 40 value 1414.249250
## iter 50 value 1409.559954
## iter 60 value 1407.939471
## iter 70 value 1402.098314
## iter 80 value 1397.209534
## iter 90 value 1369.548414
## iter 100 value 1344.980991
## final value 1344.980991
## stopped after 100 iterations
## # weights: 771
## initial value 2497.128925
## iter 10 value 1473.299176
## iter 20 value 1471.459449
## iter 30 value 1460.491732
## iter 40 value 1432.422786
## iter 50 value 1409.088279
## iter 60 value 1345.969201
## iter 70 value 1283.306298
## iter 80 value 1193.215457
## iter 90 value 1150.741027
## iter 100 value 1149.410801
## final value 1149.410801
## stopped after 100 iterations
## # weights: 221
## initial value 3368.768004
## iter 10 value 1479.536070
## iter 20 value 1448.650168
## iter 30 value 1398.488885
## iter 40 value 1382.544419
## iter 50 value 1332.849218
## iter 60 value 1165.153945
## iter 70 value 1082.990815
## iter 80 value 1056.947686
## iter 90 value 1051.644462
## iter 100 value 1044.748696
## final value 1044.748696
## stopped after 100 iterations
## # weights: 331
## initial value 3684.101493
## iter 10 value 1452.510135
## iter 20 value 1437.013993
## iter 30 value 1433.636025
## iter 40 value 1431.897675
## iter 50 value 1413.003762
## iter 60 value 1387.954217
## iter 70 value 1373.875016
## iter 80 value 1313.785145
## iter 90 value 1265.327991
## iter 100 value 1129.258050
## final value 1129.258050
## stopped after 100 iterations
## # weights: 551
## initial value 2365.301084
## iter 10 value 1474.325956
## iter 20 value 1443.563278
## iter 30 value 1442.059548
## iter 40 value 1433.692983
## iter 50 value 1424.342201
## iter 60 value 1395.656566
## iter 70 value 1390.070956
## iter 80 value 1385.076714
## iter 90 value 1364.487179
## iter 100 value 1233.308913
## final value 1233.308913
## stopped after 100 iterations
## # weights: 771
## initial value 3426.458156
## iter 10 value 1472.220984
## iter 20 value 1456.741820
## iter 30 value 1443.979227
## iter 40 value 1431.542531
## iter 50 value 1419.061699
## iter 60 value 1358.578517
## iter 70 value 1332.310274
## iter 80 value 1326.129798
## iter 90 value 1325.363576
## iter 100 value 1320.901745
## final value 1320.901745
## stopped after 100 iterations
## # weights: 221
## initial value 2636.928893
## iter 10 value 1468.259419
## iter 20 value 1457.601250
## iter 30 value 1281.943845
## iter 40 value 1151.232140
## iter 50 value 1079.561635
## iter 60 value 1057.953489
## iter 70 value 1047.326317
## iter 80 value 1045.161374
## iter 90 value 1043.685850
## iter 100 value 1043.652747
## final value 1043.652747
## stopped after 100 iterations
## # weights: 331
## initial value 2933.924321
## iter 10 value 1453.424804
## iter 20 value 1436.482981
## iter 30 value 1422.411935
## iter 40 value 1415.306331
## iter 50 value 1395.736239
## iter 60 value 1373.996481
## iter 70 value 1342.598418
## iter 80 value 1329.938753
## iter 90 value 1257.036356
## iter 100 value 1027.240686
## final value 1027.240686
## stopped after 100 iterations
## # weights: 551
## initial value 5102.206419
## iter 10 value 1620.946522
## iter 20 value 1462.233398
## iter 30 value 1452.253226
## iter 40 value 1436.868651
## iter 50 value 1419.646632
## iter 60 value 1374.499720
## iter 70 value 1364.272022
## iter 80 value 1333.275450
## iter 90 value 1273.713055
## iter 100 value 1200.037748
## final value 1200.037748
## stopped after 100 iterations
## # weights: 771
## initial value 4176.782620
## iter 10 value 1473.601787
## iter 20 value 1444.673774
## iter 30 value 1435.914901
## iter 40 value 1422.676316
## iter 50 value 1376.124151
## iter 60 value 1330.421199
## iter 70 value 1326.072765
## iter 80 value 1314.812803
## iter 90 value 1236.826179
## iter 100 value 1221.373137
## final value 1221.373137
## stopped after 100 iterations
## # weights: 221
## initial value 2398.900929
## iter 10 value 1439.719491
## iter 20 value 1432.069452
## iter 30 value 1430.355364
## iter 40 value 1416.393778
## iter 50 value 1397.143797
## iter 60 value 1393.493922
## final value 1393.226234
## converged
## # weights: 331
## initial value 4784.442827
## iter 10 value 1471.613920
## iter 20 value 1464.579482
## iter 30 value 1459.006499
## iter 40 value 1425.663469
## iter 50 value 1402.697636
## iter 60 value 1401.960954
## iter 70 value 1392.498789
## iter 80 value 1370.999354
## iter 90 value 1343.824223
## iter 100 value 1210.444177
## final value 1210.444177
## stopped after 100 iterations
## # weights: 551
## initial value 4244.839569
## iter 10 value 1471.585273
## iter 20 value 1462.049917
## iter 30 value 1424.398076
## iter 40 value 1409.093392
## iter 50 value 1401.481485
## iter 60 value 1392.838454
## iter 70 value 1378.989459
## iter 80 value 1377.123927
## iter 90 value 1376.139706
## iter 100 value 1375.273429
## final value 1375.273429
## stopped after 100 iterations
## # weights: 771
## initial value 2047.729453
## iter 10 value 1473.583468
## iter 20 value 1456.392547
## iter 30 value 1423.232831
## iter 40 value 1414.543663
## iter 50 value 1412.257873
## iter 60 value 1409.596927
## iter 70 value 1388.033894
## iter 80 value 1382.532539
## iter 90 value 1374.519560
## iter 100 value 1344.064404
## final value 1344.064404
## stopped after 100 iterations
## # weights: 221
## initial value 6686.092826
## iter 10 value 1668.962278
## iter 20 value 1516.548295
## iter 30 value 1443.822664
## iter 40 value 1426.711792
## iter 50 value 1399.522807
## iter 60 value 1398.867336
## iter 70 value 1397.036245
## iter 80 value 1386.393067
## iter 90 value 1320.816627
## iter 100 value 1221.568781
## final value 1221.568781
## stopped after 100 iterations
## # weights: 331
## initial value 3014.616130
## iter 10 value 1474.872398
## iter 20 value 1446.158806
## iter 30 value 1360.836601
## iter 40 value 1190.528729
## iter 50 value 1117.877457
## iter 60 value 1084.625007
## iter 70 value 1076.710308
## iter 80 value 1074.963430
## iter 90 value 1074.881126
## iter 100 value 1074.512192
## final value 1074.512192
## stopped after 100 iterations
## # weights: 551
## initial value 6717.327313
## iter 10 value 1673.648481
## iter 20 value 1651.254576
## iter 30 value 1445.438888
## iter 40 value 1445.200087
## iter 50 value 1439.782757
## iter 60 value 1429.063333
## iter 70 value 1405.977708
## iter 80 value 1391.384082
## iter 90 value 1315.214495
## iter 100 value 1243.699500
## final value 1243.699500
## stopped after 100 iterations
## # weights: 771
## initial value 2965.067855
## iter 10 value 1473.097898
## iter 20 value 1443.169315
## iter 30 value 1433.986250
## iter 40 value 1426.203150
## iter 50 value 1418.231668
## iter 60 value 1394.675980
## iter 70 value 1391.734802
## iter 80 value 1350.793840
## iter 90 value 1282.407739
## iter 100 value 1158.320625
## final value 1158.320625
## stopped after 100 iterations
## # weights: 221
## initial value 6390.504817
## iter 10 value 1477.769597
## iter 20 value 1464.837523
## iter 30 value 1442.096311
## iter 40 value 1420.459299
## iter 50 value 1396.465551
## iter 60 value 1230.529287
## iter 70 value 1169.601150
## iter 80 value 1126.646573
## iter 90 value 1084.043140
## iter 100 value 1001.464921
## final value 1001.464921
## stopped after 100 iterations
## # weights: 331
## initial value 3361.124445
## iter 10 value 1459.985739
## iter 20 value 1429.268291
## iter 30 value 1328.552884
## iter 40 value 1070.713798
## iter 50 value 1028.625594
## iter 60 value 1026.706393
## final value 1026.625424
## converged
## # weights: 551
## initial value 4761.323456
## iter 10 value 1455.260056
## iter 20 value 1437.605407
## iter 30 value 1423.793227
## iter 40 value 1330.242099
## iter 50 value 1225.524969
## iter 60 value 1204.906870
## iter 70 value 1155.498214
## iter 80 value 1059.054605
## iter 90 value 1039.869380
## iter 100 value 1023.465603
## final value 1023.465603
## stopped after 100 iterations
## # weights: 771
## initial value 1669.619865
## iter 10 value 1451.850212
## iter 20 value 1435.723319
## iter 30 value 1406.557746
## iter 40 value 1392.190299
## iter 50 value 1374.095793
## iter 60 value 1301.758400
## iter 70 value 1274.526920
## iter 80 value 1149.066283
## iter 90 value 1088.825158
## iter 100 value 1054.467401
## final value 1054.467401
## stopped after 100 iterations
## # weights: 221
## initial value 4804.295432
## iter 10 value 1472.756827
## iter 20 value 1472.110176
## iter 30 value 1472.044447
## iter 40 value 1466.089087
## iter 50 value 1410.754282
## iter 60 value 1396.222472
## iter 70 value 1350.890558
## iter 80 value 1246.561903
## iter 90 value 1147.473740
## iter 100 value 1140.339777
## final value 1140.339777
## stopped after 100 iterations
## # weights: 331
## initial value 3298.120749
## iter 10 value 1472.326209
## iter 20 value 1458.558007
## iter 30 value 1419.509844
## iter 40 value 1323.616529
## iter 50 value 1171.115521
## iter 60 value 1060.869627
## iter 70 value 1025.910988
## iter 80 value 1013.487355
## iter 90 value 1011.593944
## iter 100 value 1011.268930
## final value 1011.268930
## stopped after 100 iterations
## # weights: 551
## initial value 5866.677144
## iter 10 value 1461.812922
## iter 20 value 1457.246562
## iter 30 value 1429.547463
## iter 40 value 1295.832536
## iter 50 value 1178.216810
## iter 60 value 1143.135307
## iter 70 value 1129.080896
## iter 80 value 1122.959905
## iter 90 value 1097.501312
## iter 100 value 1010.708250
## final value 1010.708250
## stopped after 100 iterations
## # weights: 771
## initial value 3071.871631
## iter 10 value 1466.441731
## iter 20 value 1438.316847
## iter 30 value 1432.286345
## iter 40 value 1395.781508
## iter 50 value 1285.514639
## iter 60 value 1175.359101
## iter 70 value 1152.644601
## iter 80 value 1147.918115
## iter 90 value 1107.407902
## iter 100 value 1036.355254
## final value 1036.355254
## stopped after 100 iterations
## # weights: 221
## initial value 2593.364767
## iter 10 value 1498.988768
## iter 20 value 1472.911549
## iter 30 value 1441.064739
## iter 40 value 1426.524821
## iter 50 value 1398.359390
## iter 60 value 1390.766745
## iter 70 value 1378.018705
## iter 80 value 1367.441075
## iter 90 value 1351.338568
## iter 100 value 1300.389742
## final value 1300.389742
## stopped after 100 iterations
## # weights: 331
## initial value 2449.457775
## iter 10 value 1468.059192
## iter 20 value 1434.573716
## iter 30 value 1339.907100
## iter 40 value 1189.837536
## iter 50 value 1113.220989
## iter 60 value 1046.688534
## iter 70 value 1035.780491
## iter 80 value 1029.641756
## iter 90 value 1021.614939
## iter 100 value 1020.642055
## final value 1020.642055
## stopped after 100 iterations
## # weights: 551
## initial value 5223.585908
## iter 10 value 1476.340975
## iter 20 value 1471.943390
## iter 30 value 1458.884468
## iter 40 value 1449.623659
## iter 50 value 1416.939740
## iter 60 value 1405.551820
## iter 70 value 1388.484469
## iter 80 value 1318.078308
## iter 90 value 1170.539861
## iter 100 value 1101.509198
## final value 1101.509198
## stopped after 100 iterations
## # weights: 771
## initial value 2598.410600
## iter 10 value 1471.674802
## iter 20 value 1459.576226
## iter 30 value 1421.208839
## iter 40 value 1407.012400
## iter 50 value 1396.880802
## iter 60 value 1378.316513
## iter 70 value 1322.624729
## iter 80 value 1302.609627
## iter 90 value 1268.462702
## iter 100 value 1122.224023
## final value 1122.224023
## stopped after 100 iterations
## # weights: 331
## initial value 6220.374208
## iter 10 value 2207.449829
## iter 20 value 2171.396919
## iter 30 value 2145.880105
## iter 40 value 2142.914904
## iter 50 value 2142.521786
## iter 60 value 2107.920068
## iter 70 value 2096.600242
## iter 80 value 2089.615411
## iter 90 value 2050.313852
## iter 100 value 1882.385342
## final value 1882.385342
## stopped after 100 iterations
Adult_TDA_PC_5.60.5_n1_NN1Fit0
## Neural Network
##
## 6560 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 4374, 4373, 4373
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.8984758 0.15562165
## 2 0.5 0.8948171 0.00000000
## 2 0.7 0.8964944 0.04998668
## 3 0.3 0.9009151 0.19970302
## 3 0.5 0.8983226 0.18479662
## 3 0.7 0.8955791 0.11492999
## 5 0.3 0.8971041 0.12616547
## 5 0.5 0.8966461 0.10052084
## 5 0.7 0.8948171 0.00000000
## 7 0.3 0.8932930 0.24529946
## 7 0.5 0.8948171 0.00000000
## 7 0.7 0.8948171 0.00000000
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.3.
Adult_TDA_PC_5.60.5_n1_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8948331 0.0000000 Fold3
## 2 0.9039781 0.3271932 Fold2
## 3 0.9039341 0.2719158 Fold1
ad_tda_pc_5.60.5_n1_nn1_fit_re<-Adult_TDA_PC_5.60.5_n1_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n1_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 1.89 -0.21 0.54 -0.23 -0.12 0.00 4.80 -3.15
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## -0.35 0.40 0.00 0.00 0.57 -0.04 0.46 0.01
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.06 0.39 0.05 1.11 -2.41 -0.94 -0.06 2.78
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## -1.73 0.00 -0.02 1.65 -0.48 0.24 -0.03 1.70
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 -0.03 0.00 0.01 0.54 1.28 0.04 0.41
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## -2.70 -1.18 0.12 0.83 0.38 0.00 -0.74 1.06
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 2.01 0.76 -0.93 1.11 0.38 0.14 0.03 -0.02
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.24 0.20 -1.34 1.07 0.15 1.79 0.17 1.71
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 -0.06 0.53 0.19 1.32 0.18 -0.03
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.19 0.00 -0.02 0.04 0.03 -0.07 0.41 -1.25
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.09 0.13 -1.49 -1.20
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## -0.01 0.02 0.00 0.54 -0.02 0.03 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## -0.01 0.01 -0.08 -0.01 0.03 0.00 0.04 0.14
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 2.05 0.06 0.06
## b->o h1->o h2->o h3->o
## 0.44 0.44 0.44 2.48
vip(Adult_TDA_PC_5.60.5_n1_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n1_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.60.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n1_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2407862 0.0000000 0.2323343 0.2493929 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.60.5_n1_nn1_cf0_ov_acc<-ad_tda_pc_5.60.5_n1_nn1_cf0$overall[1]
ad_tda_pc_5.60.5_n1_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.0000000 1.0000000 NaN
## Neg Pred Value Precision Recall
## 0.2407862 NA 0.0000000
## F1 Prevalence Detection Rate
## NA 0.7592138 0.0000000
## Detection Prevalence Balanced Accuracy
## 0.0000000 0.5000000
ad_tda_pc_5.60.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n1_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.60.5_n1_nn1_fit_re)
diff_tda_pca_5.60.5_nn1_n1_3_fold
## Accuracy
## 1 -0.08688260
## 2 -0.05388592
## 3 -0.10253902
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n1_3_fold_odds.left<-bst_tda_pca_5.60.5_nn1.n1_3_fold$probLeft/bst_tda_pca_5.60.5_nn1.n1_3_fold$probRight
bst_tda_pca_5.60.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n1_3_fold
## $winLeft
## [1] 0.9906
##
## $winRope
## [1] 0.0094
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n1_3_fold
## $left
## [1] 0.9749097
##
## $rope
## [1] 0.009350311
##
## $right
## [1] 0.01574002
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1_n1_3_fold))
#bf_tda_pca_5.60.5_nn1.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nn1_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_nn1_n1_3_fold)
## t = -5.656, df = 2, p-value = 0.02987
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.14279912 -0.01940591
## sample estimates:
## mean of x
## -0.08110251
### Test set diff
diff_tda_pca_5.60.5_nn1.n1_test<-(nn1_cf_ov_acc - ad_tda_pc_5.60.5_n1_nn1_cf0_ov_acc)
diff_tda_pca_5.60.5_nn1.n1_test
## Accuracy
## 0.5642916
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1.n1_test),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n1_test_odds.left<-bst_tda_pca_5.60.5_nn1.n1_test$probLeft/bst_tda_pca_5.60.5_nn1.n1_test$probRight
bst_tda_pca_5.60.5_nn1.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1.n1_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1597667
##
## $winRight
## [1] 0.8402333
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nn1.n1_test)))
#BayesFactor
#bf_tda_pca_5.60.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1.n1_test)) #bf_tda_pca_5.60.5_nn1.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nn1.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
##Adult_TDA_PC_5.60.5_n2_NN1Fit0 <- nnet(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec, size=2, range = 0.6,, type='class')
#Neural Network 1
Adult_TDA_PC_5.60.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 6450.758066
## iter 10 value 6309.697255
## iter 20 value 6248.242335
## iter 30 value 6196.168761
## iter 40 value 5996.884530
## iter 50 value 5657.588241
## iter 60 value 5298.713682
## iter 70 value 5245.241372
## iter 80 value 5238.941804
## iter 90 value 5214.397612
## iter 100 value 5182.454877
## final value 5182.454877
## stopped after 100 iterations
## # weights: 331
## initial value 6464.083393
## iter 10 value 6414.227813
## iter 20 value 6097.510821
## iter 30 value 6075.806560
## iter 40 value 6070.106615
## iter 50 value 6068.451097
## iter 60 value 6067.254096
## iter 70 value 5983.338486
## iter 80 value 5975.110529
## iter 90 value 5915.888353
## iter 100 value 5882.564742
## final value 5882.564742
## stopped after 100 iterations
## # weights: 551
## initial value 6497.002383
## iter 10 value 6342.322249
## iter 20 value 6320.966750
## iter 30 value 6073.101962
## iter 40 value 5966.906998
## iter 50 value 5886.926247
## iter 60 value 5857.718042
## iter 70 value 5849.295641
## iter 80 value 5846.151842
## iter 90 value 5831.903798
## iter 100 value 5814.817319
## final value 5814.817319
## stopped after 100 iterations
## # weights: 771
## initial value 6663.906464
## iter 10 value 6106.691945
## iter 20 value 6083.096238
## iter 30 value 6079.189633
## iter 40 value 6043.834145
## iter 50 value 5985.489271
## iter 60 value 5971.823402
## iter 70 value 5957.127044
## iter 80 value 5954.335935
## iter 90 value 5949.622641
## iter 100 value 5899.577893
## final value 5899.577893
## stopped after 100 iterations
## # weights: 221
## initial value 6469.576535
## iter 10 value 6408.124724
## iter 20 value 6095.944211
## iter 30 value 6095.222245
## iter 40 value 6085.754226
## iter 50 value 6073.133829
## iter 60 value 6031.848022
## iter 70 value 6012.941050
## iter 80 value 5791.573699
## iter 90 value 5579.078082
## iter 100 value 5513.679054
## final value 5513.679054
## stopped after 100 iterations
## # weights: 331
## initial value 7071.604647
## iter 10 value 6199.429926
## iter 20 value 6107.176904
## iter 30 value 6083.139209
## iter 40 value 6068.958497
## iter 50 value 6027.838000
## iter 60 value 5910.421989
## iter 70 value 5367.200409
## iter 80 value 5247.915977
## iter 90 value 5035.096382
## iter 100 value 4875.075219
## final value 4875.075219
## stopped after 100 iterations
## # weights: 551
## initial value 6797.450204
## iter 10 value 6238.221082
## iter 20 value 6083.042615
## iter 30 value 5970.486123
## iter 40 value 5951.843897
## iter 50 value 5945.713088
## iter 60 value 5932.806739
## iter 70 value 5929.105817
## iter 80 value 5903.284893
## iter 90 value 5879.820730
## iter 100 value 5788.999231
## final value 5788.999231
## stopped after 100 iterations
## # weights: 771
## initial value 6825.407432
## iter 10 value 6444.279893
## iter 20 value 6291.119891
## iter 30 value 6036.976272
## iter 40 value 6020.378858
## iter 50 value 6006.723295
## iter 60 value 5965.784117
## iter 70 value 5925.135141
## iter 80 value 5886.637105
## iter 90 value 5869.776959
## iter 100 value 5847.939806
## final value 5847.939806
## stopped after 100 iterations
## # weights: 221
## initial value 7590.937235
## iter 10 value 6395.389194
## iter 20 value 6059.949797
## iter 30 value 6022.300390
## iter 40 value 6009.308271
## iter 50 value 6006.745075
## iter 60 value 6003.718671
## iter 70 value 5931.252209
## iter 80 value 5706.096137
## iter 90 value 5418.028144
## iter 100 value 5281.933657
## final value 5281.933657
## stopped after 100 iterations
## # weights: 331
## initial value 9481.391710
## iter 10 value 6432.446535
## iter 20 value 6115.958665
## iter 30 value 5996.286711
## iter 40 value 5958.451670
## iter 50 value 5934.514790
## iter 60 value 5928.559269
## iter 70 value 5857.454597
## iter 80 value 5826.449473
## iter 90 value 5755.948928
## iter 100 value 5508.594845
## final value 5508.594845
## stopped after 100 iterations
## # weights: 551
## initial value 7246.565546
## iter 10 value 6394.702654
## iter 20 value 6098.687050
## iter 30 value 6090.530118
## iter 40 value 6074.369959
## iter 50 value 5951.631917
## iter 60 value 5870.390082
## iter 70 value 5806.713153
## iter 80 value 5797.267147
## iter 90 value 5791.938350
## iter 100 value 5783.234809
## final value 5783.234809
## stopped after 100 iterations
## # weights: 771
## initial value 8277.470395
## iter 10 value 6436.501778
## iter 20 value 6097.585757
## iter 30 value 5997.125779
## iter 40 value 5967.964505
## iter 50 value 5908.146318
## iter 60 value 5778.852639
## iter 70 value 5627.741895
## iter 80 value 5203.759599
## iter 90 value 5008.078551
## iter 100 value 4922.781490
## final value 4922.781490
## stopped after 100 iterations
## # weights: 221
## initial value 6925.399932
## iter 10 value 6303.117914
## iter 20 value 6098.541192
## iter 30 value 6096.841079
## iter 40 value 6082.569209
## iter 50 value 5990.770846
## iter 60 value 5950.845740
## iter 70 value 5815.994997
## iter 80 value 5676.894110
## iter 90 value 5476.730729
## iter 100 value 5341.778705
## final value 5341.778705
## stopped after 100 iterations
## # weights: 331
## initial value 7391.644042
## iter 10 value 6381.995409
## iter 20 value 6139.153122
## iter 30 value 6072.266596
## iter 40 value 5965.968694
## iter 50 value 5860.338451
## iter 60 value 5541.473821
## iter 70 value 5165.721634
## iter 80 value 5149.675476
## iter 90 value 5139.466824
## iter 100 value 5097.839933
## final value 5097.839933
## stopped after 100 iterations
## # weights: 551
## initial value 7849.559342
## iter 10 value 6177.728556
## iter 20 value 6070.020672
## iter 30 value 6018.883008
## iter 40 value 5999.198181
## iter 50 value 5995.800890
## iter 60 value 5968.836645
## iter 70 value 5949.764938
## iter 80 value 5928.142121
## iter 90 value 5886.958538
## iter 100 value 5525.389143
## final value 5525.389143
## stopped after 100 iterations
## # weights: 771
## initial value 6618.520897
## iter 10 value 6247.712375
## iter 20 value 6066.320161
## iter 30 value 6050.380936
## iter 40 value 6031.415884
## iter 50 value 5989.421751
## iter 60 value 5986.485814
## iter 70 value 5983.277920
## iter 80 value 5982.758703
## iter 90 value 5982.646340
## iter 100 value 5976.554420
## final value 5976.554420
## stopped after 100 iterations
## # weights: 221
## initial value 6624.386623
## iter 10 value 6352.539039
## iter 20 value 6351.105513
## iter 30 value 6091.494135
## iter 40 value 6087.237195
## iter 50 value 6080.431511
## iter 60 value 5984.655430
## iter 70 value 5979.888775
## iter 80 value 5936.402931
## iter 90 value 5887.083004
## iter 100 value 5803.625187
## final value 5803.625187
## stopped after 100 iterations
## # weights: 331
## initial value 6895.917023
## iter 10 value 6421.541243
## iter 20 value 6337.756038
## iter 30 value 6039.159866
## iter 40 value 5989.582412
## iter 50 value 5961.452230
## iter 60 value 5950.796425
## iter 70 value 5925.505243
## iter 80 value 5885.622689
## iter 90 value 5832.072678
## iter 100 value 5812.019580
## final value 5812.019580
## stopped after 100 iterations
## # weights: 551
## initial value 6476.899030
## iter 10 value 6252.818280
## iter 20 value 6091.410725
## iter 30 value 6050.060853
## iter 40 value 6032.413629
## iter 50 value 5988.622817
## iter 60 value 5952.673875
## iter 70 value 5913.003199
## iter 80 value 5901.609662
## iter 90 value 5889.393522
## iter 100 value 5880.620215
## final value 5880.620215
## stopped after 100 iterations
## # weights: 771
## initial value 6656.295216
## iter 10 value 6433.362740
## iter 20 value 6262.960624
## iter 30 value 6240.940926
## iter 40 value 6118.607473
## iter 50 value 6094.179353
## iter 60 value 5965.476099
## iter 70 value 5940.319212
## iter 80 value 5919.263886
## iter 90 value 5912.172506
## iter 100 value 5892.421706
## final value 5892.421706
## stopped after 100 iterations
## # weights: 221
## initial value 6642.680049
## iter 10 value 6437.105360
## iter 20 value 6435.482249
## iter 30 value 6120.768080
## iter 40 value 6064.079930
## iter 50 value 6033.160039
## iter 60 value 6009.067464
## iter 70 value 5942.317943
## iter 80 value 5936.754862
## iter 90 value 5926.279070
## iter 100 value 5908.055881
## final value 5908.055881
## stopped after 100 iterations
## # weights: 331
## initial value 7288.128127
## iter 10 value 6434.637682
## iter 20 value 6308.756666
## iter 30 value 6095.085952
## iter 40 value 6086.422230
## iter 50 value 6032.806164
## iter 60 value 5980.851270
## iter 70 value 5966.151669
## iter 80 value 5958.776633
## iter 90 value 5935.804513
## iter 100 value 5885.095545
## final value 5885.095545
## stopped after 100 iterations
## # weights: 551
## initial value 7280.024190
## iter 10 value 6335.326059
## iter 20 value 6056.785859
## iter 30 value 6035.410784
## iter 40 value 5963.693831
## iter 50 value 5946.522615
## iter 60 value 5860.359957
## iter 70 value 5745.775932
## iter 80 value 5101.190442
## iter 90 value 4978.885524
## iter 100 value 4969.186569
## final value 4969.186569
## stopped after 100 iterations
## # weights: 771
## initial value 6513.864583
## iter 10 value 6233.480247
## iter 20 value 6033.096419
## iter 30 value 5989.360453
## iter 40 value 5968.854361
## iter 50 value 5947.373119
## iter 60 value 5929.322952
## iter 70 value 5906.118051
## iter 80 value 5874.695975
## iter 90 value 5869.020232
## iter 100 value 5854.020123
## final value 5854.020123
## stopped after 100 iterations
## # weights: 221
## initial value 6759.508796
## iter 10 value 6393.448276
## iter 20 value 6086.022497
## iter 30 value 6082.809885
## iter 40 value 6073.483565
## iter 50 value 6005.288971
## iter 60 value 6000.193054
## iter 70 value 5997.340763
## iter 80 value 5993.426245
## iter 90 value 5986.452621
## iter 100 value 5972.010801
## final value 5972.010801
## stopped after 100 iterations
## # weights: 331
## initial value 6538.759378
## final value 6437.106937
## converged
## # weights: 551
## initial value 8902.006468
## iter 10 value 6372.210642
## iter 20 value 6077.065228
## iter 30 value 6011.501226
## iter 40 value 5951.786579
## iter 50 value 5913.538491
## iter 60 value 5897.675722
## iter 70 value 5675.585555
## iter 80 value 5171.148844
## iter 90 value 5116.136813
## iter 100 value 4879.242661
## final value 4879.242661
## stopped after 100 iterations
## # weights: 771
## initial value 6610.753745
## iter 10 value 6329.226245
## iter 20 value 6322.016688
## iter 30 value 6316.591281
## iter 40 value 6121.947100
## iter 50 value 6098.074794
## iter 60 value 6086.135409
## iter 70 value 6083.416589
## iter 80 value 6077.461012
## iter 90 value 6076.619014
## iter 100 value 6076.140908
## final value 6076.140908
## stopped after 100 iterations
## # weights: 221
## initial value 6865.444058
## final value 6437.106959
## converged
## # weights: 331
## initial value 7678.996080
## iter 10 value 6234.863234
## iter 20 value 6127.758861
## iter 30 value 6081.628029
## iter 40 value 6046.713176
## iter 50 value 6037.762082
## iter 60 value 6035.505474
## iter 70 value 6033.149920
## iter 80 value 6016.839941
## iter 90 value 5988.729390
## iter 100 value 5959.084505
## final value 5959.084505
## stopped after 100 iterations
## # weights: 551
## initial value 6544.462069
## iter 10 value 6348.906466
## iter 20 value 6110.308343
## iter 30 value 5883.450867
## iter 40 value 5507.389633
## iter 50 value 5169.685370
## iter 60 value 5076.601583
## iter 70 value 4972.357492
## iter 80 value 4822.940151
## iter 90 value 4749.223835
## iter 100 value 4735.243882
## final value 4735.243882
## stopped after 100 iterations
## # weights: 771
## initial value 10208.858574
## iter 10 value 6182.653395
## iter 20 value 6128.839466
## iter 30 value 6086.772274
## iter 40 value 6066.033674
## iter 50 value 6061.639103
## iter 60 value 6021.280931
## iter 70 value 5943.638463
## iter 80 value 5672.054013
## iter 90 value 5481.925027
## iter 100 value 5439.811670
## final value 5439.811670
## stopped after 100 iterations
## # weights: 221
## initial value 7670.662511
## iter 10 value 6439.296940
## iter 20 value 6437.141166
## final value 6437.107081
## converged
## # weights: 331
## initial value 6552.235676
## iter 10 value 6437.091320
## iter 10 value 6437.091315
## iter 20 value 6381.209589
## iter 30 value 6121.638836
## iter 40 value 5988.769277
## iter 50 value 5807.606745
## iter 60 value 5452.892825
## iter 70 value 5224.649769
## iter 80 value 4992.962262
## iter 90 value 4879.674316
## iter 100 value 4824.727422
## final value 4824.727422
## stopped after 100 iterations
## # weights: 551
## initial value 6523.176048
## iter 10 value 6433.201890
## iter 20 value 6013.465538
## iter 30 value 5961.475721
## iter 40 value 5951.215937
## iter 50 value 5912.440432
## iter 60 value 5903.120227
## iter 70 value 5872.344436
## iter 80 value 5864.003517
## iter 90 value 5859.021385
## iter 100 value 5843.318663
## final value 5843.318663
## stopped after 100 iterations
## # weights: 771
## initial value 6720.488842
## iter 10 value 6416.407577
## iter 20 value 6111.334930
## iter 30 value 6010.933784
## iter 40 value 5985.265001
## iter 50 value 5979.868989
## iter 60 value 5975.110660
## iter 70 value 5973.960533
## iter 80 value 5968.546437
## iter 90 value 5961.932420
## iter 100 value 5946.386875
## final value 5946.386875
## stopped after 100 iterations
## # weights: 551
## initial value 9734.604067
## iter 10 value 9649.529011
## iter 20 value 9143.027823
## iter 30 value 9076.401701
## iter 40 value 8986.672925
## iter 50 value 8918.585958
## iter 60 value 8896.008300
## iter 70 value 8850.395539
## iter 80 value 8752.145731
## iter 90 value 8644.482881
## iter 100 value 8517.743302
## final value 8517.743302
## stopped after 100 iterations
Adult_TDA_PC_5.60.5_n2_NN1Fit0
## Neural Network
##
## 13933 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9288, 9289, 9289
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.6709226 0.3451319
## 2 0.5 0.6118503 0.2177579
## 2 0.7 0.6080457 0.2132917
## 3 0.3 0.6141545 0.2255223
## 3 0.5 0.6580730 0.3209352
## 3 0.7 0.6690599 0.3418958
## 5 0.3 0.6779642 0.3596250
## 5 0.5 0.6514065 0.3091927
## 5 0.7 0.6537035 0.3140368
## 7 0.3 0.5874536 0.1853705
## 7 0.5 0.6397070 0.2861448
## 7 0.7 0.6510383 0.3081234
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.3.
Adult_TDA_PC_5.60.5_n2_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.6023681 0.2148234 Fold1
## 2 0.7353575 0.4699286 Fold3
## 3 0.6961671 0.3941229 Fold2
ad_tda_pc_5.60.5_n2_nn1_fit_re<-Adult_TDA_PC_5.60.5_n2_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n2_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## -0.76 -0.01 0.20 -0.01 0.39 0.00 -1.00 0.33
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## -0.47 -0.20 0.00 0.00 -0.75 0.83 -0.08 0.01
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.41 -0.11 -0.86 0.29 0.37 1.14 -0.87 -1.95
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.66 0.00 0.22 -0.07 -0.01 0.49 -0.01 -1.91
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.09 0.36 0.22 0.00 0.20 -0.04 -0.01 -1.03
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 1.29 -0.37 -0.52 -0.10 -0.29 0.00 0.54 0.39
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## -1.08 0.45 -0.18 -3.00 0.95 -0.04 0.09 0.18
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 1.05 -0.22 -0.09 1.00 0.25 -1.70 1.33 -2.09
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 -0.05 0.16 0.01 -0.57 0.02 -0.01
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## -0.13 0.00 0.01 -0.02 -0.04 0.04 -0.58 -0.15
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## -0.01 -0.04 0.00 0.01 0.02 0.00 -0.08 -0.07
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 -0.19 0.01 -0.03 0.00 -0.29 0.16 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.14 0.01 0.02 0.26 0.00 -0.10 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.01 0.00 0.70 -0.04 0.02
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## -0.22 0.31 -0.06 0.13 0.12 0.00 0.04 0.05
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## -0.50 0.00 0.00 0.00 -0.05 -0.14 -0.05 0.05
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.06 0.03 -0.02 -0.04 -0.01 0.57 0.04 -0.67
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.44 0.00 -0.04 -0.37 1.70 0.23 0.00 -0.69
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.03 0.15 0.01 0.05 -0.06 0.08 0.02 -0.36
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.34 -0.23 -0.13 -0.22 -0.17 0.00 0.60 0.02
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.23 -0.11 -0.22 -1.17 0.31 0.03 0.02 0.09
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 0.51 -0.07 -0.12 0.07 0.00 -0.10 0.61 -0.83
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.12 -0.32 -0.52 -0.12 -0.01 0.04 -0.06 -0.01
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## -0.04 0.00 0.00 0.07 0.01 0.00 0.01 -0.01
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## 0.00 0.06 0.00 0.00 -0.09 0.00 0.03 0.02
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.01 0.01 0.00 0.02 0.00 0.03 0.02 0.00
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.01 -0.01 0.01 0.00 0.02 0.00 0.01 0.01
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.00 0.00 -0.25 -0.01 0.00
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o
## 0.36 3.95 0.36 0.38 -1.44 0.42
vip(Adult_TDA_PC_5.60.5_n2_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n2_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.60.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3895 1431
## >50K 3521 921
##
## Accuracy : 0.493
## 95% CI : (0.4831, 0.503)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0638
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.5252
## Specificity : 0.3916
## Pos Pred Value : 0.7313
## Neg Pred Value : 0.2073
## Prevalence : 0.7592
## Detection Rate : 0.3988
## Detection Prevalence : 0.5452
## Balanced Accuracy : 0.4584
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3895 1431
## >50K 3521 921
##
## Accuracy : 0.493
## 95% CI : (0.4831, 0.503)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0638
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.5252
## Specificity : 0.3916
## Pos Pred Value : 0.7313
## Neg Pred Value : 0.2073
## Prevalence : 0.7592
## Detection Rate : 0.3988
## Detection Prevalence : 0.5452
## Balanced Accuracy : 0.4584
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n2_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 4.930385e-01 -6.383549e-02 4.830761e-01 5.030051e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 1.172644e-193
ad_tda_pc_5.60.5_n2_nn1_cf0_ov_acc<-ad_tda_pc_5.60.5_n2_nn1_cf0$overall[1]
ad_tda_pc_5.60.5_n2_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.5252157 0.3915816 0.7313181
## Neg Pred Value Precision Recall
## 0.2073390 0.7313181 0.5252157
## F1 Prevalence Detection Rate
## 0.6113640 0.7592138 0.3987510
## Detection Prevalence Balanced Accuracy
## 0.5452498 0.4583987
ad_tda_pc_5.60.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n2_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.60.5_n2_nn1_fit_re)
diff_tda_pca_5.60.5_nn1_n2_3_fold
## Accuracy
## 1 0.2055824
## 2 0.1147347
## 3 0.1052280
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n2_3_fold_odds.left<-bst_tda_pca_5.60.5_nn1.n2_3_fold$probLeft/bst_tda_pca_5.60.5_nn1.n2_3_fold$probRight
bst_tda_pca_5.60.5_nn1.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009666667
##
## $winRight
## [1] 0.9903333
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n2_3_fold
## $left
## [1] 0.02718863
##
## $rope
## [1] 0.007957845
##
## $right
## [1] 0.9648535
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1_n2_3_fold))
#bf_tda_pca_5.60.5_nn1.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nn1_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_nn1_n2_3_fold)
## t = 4.4348, df = 2, p-value = 0.04727
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.004228177 0.279468526
## sample estimates:
## mean of x
## 0.1418484
### Test set diff
diff_tda_pca_5.60.5_nn1.n2_test<-(nn1_cf_ov_acc - ad_tda_pc_5.60.5_n2_nn1_cf0_ov_acc)
diff_tda_pca_5.60.5_nn1.n2_test
## Accuracy
## 0.3120393
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1.n2_test),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n2_test_odds.left<-bst_tda_pca_5.60.5_nn1.n2_test$probLeft/bst_tda_pca_5.60.5_nn1.n2_test$probRight
bst_tda_pca_5.60.5_nn1.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1.n2_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1595
##
## $winRight
## [1] 0.8405
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nn1.n2_test)))
#BayesFactor
#bf_tda_pca_5.60.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1.n2_test)) #bf_tda_pca_5.60.5_nn1.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nn1.n2_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node3
#Neural Network 1
Adult_TDA_PC_5.60.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n3.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 8020.772569
## iter 10 value 5969.530145
## iter 20 value 5968.527188
## iter 30 value 5949.360754
## iter 40 value 5908.199289
## iter 50 value 5883.980449
## iter 60 value 5595.301300
## iter 70 value 5364.202127
## iter 80 value 5143.340234
## iter 90 value 4881.575685
## iter 100 value 4787.072563
## final value 4787.072563
## stopped after 100 iterations
## # weights: 331
## initial value 7876.574395
## iter 10 value 5640.144786
## iter 20 value 5609.583173
## iter 30 value 5528.735964
## iter 40 value 5460.248702
## iter 50 value 5437.382420
## iter 60 value 5429.217403
## iter 70 value 5407.786755
## iter 80 value 5400.048523
## iter 90 value 5381.470321
## iter 100 value 5347.146450
## final value 5347.146450
## stopped after 100 iterations
## # weights: 551
## initial value 9682.572812
## iter 10 value 5968.539665
## iter 20 value 5968.445277
## iter 30 value 5967.123101
## iter 40 value 5824.993754
## iter 50 value 5653.073291
## iter 60 value 5618.543443
## iter 70 value 5580.653303
## iter 80 value 5575.313823
## iter 90 value 5473.017319
## iter 100 value 5442.425101
## final value 5442.425101
## stopped after 100 iterations
## # weights: 771
## initial value 6814.682943
## iter 10 value 5967.890203
## iter 20 value 5870.847057
## iter 30 value 5715.317568
## iter 40 value 5648.507585
## iter 50 value 5626.354075
## iter 60 value 5593.885581
## iter 70 value 5567.226873
## iter 80 value 5554.566383
## iter 90 value 5504.902192
## iter 100 value 5487.933203
## final value 5487.933203
## stopped after 100 iterations
## # weights: 221
## initial value 6595.657007
## iter 10 value 5967.116369
## iter 20 value 5924.877302
## iter 30 value 5887.341950
## iter 40 value 5468.628120
## iter 50 value 5433.142966
## iter 60 value 5353.283782
## iter 70 value 5306.936054
## iter 80 value 5251.047783
## iter 90 value 5115.378075
## iter 100 value 4966.987226
## final value 4966.987226
## stopped after 100 iterations
## # weights: 331
## initial value 9822.068219
## iter 10 value 5691.734051
## iter 20 value 5639.033781
## iter 30 value 5626.230582
## iter 40 value 5626.097934
## iter 50 value 5618.610550
## iter 60 value 5614.315605
## iter 70 value 5493.088629
## iter 80 value 5435.521515
## iter 90 value 5420.314009
## iter 100 value 5414.200725
## final value 5414.200725
## stopped after 100 iterations
## # weights: 551
## initial value 6369.122030
## iter 10 value 5674.731060
## iter 20 value 5597.650283
## iter 30 value 5503.012658
## iter 40 value 5471.541590
## iter 50 value 5446.189193
## iter 60 value 5422.698886
## iter 70 value 5414.955533
## iter 80 value 5409.760974
## iter 90 value 5406.034079
## iter 100 value 5386.793622
## final value 5386.793622
## stopped after 100 iterations
## # weights: 771
## initial value 6625.754248
## iter 10 value 5959.277333
## iter 20 value 5947.357128
## iter 30 value 5638.193742
## iter 40 value 5619.010973
## iter 50 value 5588.253177
## iter 60 value 5566.801060
## iter 70 value 5505.422533
## iter 80 value 5470.497425
## iter 90 value 5427.103308
## iter 100 value 5397.914398
## final value 5397.914398
## stopped after 100 iterations
## # weights: 221
## initial value 6221.898355
## iter 10 value 5948.613396
## iter 20 value 5632.550362
## iter 30 value 5632.469518
## iter 40 value 5632.320691
## iter 50 value 5632.213129
## iter 60 value 5630.055002
## iter 70 value 5624.918082
## iter 80 value 5624.863679
## final value 5624.863478
## converged
## # weights: 331
## initial value 6990.937798
## iter 10 value 5804.076383
## iter 20 value 5632.940775
## iter 30 value 5627.105931
## iter 40 value 5624.760299
## final value 5624.693708
## converged
## # weights: 551
## initial value 6474.331245
## iter 10 value 5956.137700
## iter 20 value 5803.072780
## iter 30 value 5638.391208
## iter 40 value 5621.514774
## iter 50 value 5608.477175
## iter 60 value 5538.997001
## iter 70 value 5522.673354
## iter 80 value 5521.079645
## iter 90 value 5483.089790
## iter 100 value 5465.016574
## final value 5465.016574
## stopped after 100 iterations
## # weights: 771
## initial value 6138.362418
## iter 10 value 5911.694695
## iter 20 value 5628.580252
## iter 30 value 5619.693358
## iter 40 value 5505.470256
## iter 50 value 5451.128658
## iter 60 value 5448.696480
## iter 70 value 5446.780985
## iter 80 value 5440.907180
## iter 90 value 5419.116285
## iter 100 value 5406.536031
## final value 5406.536031
## stopped after 100 iterations
## # weights: 221
## initial value 7610.209014
## iter 10 value 5968.220595
## final value 5968.220076
## converged
## # weights: 331
## initial value 6548.381792
## iter 10 value 5845.150439
## iter 20 value 5638.747057
## iter 30 value 5636.173762
## iter 40 value 5629.936031
## iter 50 value 5463.446678
## iter 60 value 5453.469623
## iter 70 value 5441.200910
## iter 80 value 5420.306589
## iter 90 value 5411.196803
## iter 100 value 5392.098701
## final value 5392.098701
## stopped after 100 iterations
## # weights: 551
## initial value 6027.309520
## iter 10 value 5756.350278
## iter 20 value 5726.352466
## iter 30 value 5637.089391
## iter 40 value 5624.366500
## iter 50 value 5618.492974
## iter 60 value 5615.065191
## iter 70 value 5590.170613
## iter 80 value 5495.265814
## iter 90 value 5454.032391
## iter 100 value 5440.840438
## final value 5440.840438
## stopped after 100 iterations
## # weights: 771
## initial value 5873.644743
## iter 10 value 5729.978150
## iter 20 value 5667.010678
## iter 30 value 5632.796459
## iter 40 value 5625.385661
## iter 50 value 5618.687273
## iter 60 value 5609.563928
## iter 70 value 5605.323436
## iter 80 value 5603.329805
## iter 90 value 5600.719730
## iter 100 value 5595.158221
## final value 5595.158221
## stopped after 100 iterations
## # weights: 221
## initial value 8691.966473
## iter 10 value 5974.213633
## iter 20 value 5968.590062
## iter 30 value 5955.819590
## iter 40 value 5927.948423
## iter 50 value 5871.144425
## iter 60 value 5641.776286
## iter 70 value 5634.417847
## iter 80 value 5627.397930
## iter 90 value 5606.597580
## iter 100 value 5567.276285
## final value 5567.276285
## stopped after 100 iterations
## # weights: 331
## initial value 7843.424471
## iter 10 value 5783.996502
## iter 20 value 5757.785028
## iter 30 value 5653.971440
## iter 40 value 5622.346296
## iter 50 value 5580.269421
## iter 60 value 5530.667997
## iter 70 value 5474.085177
## iter 80 value 5466.030870
## iter 90 value 5451.715745
## iter 100 value 5428.636545
## final value 5428.636545
## stopped after 100 iterations
## # weights: 551
## initial value 6564.611784
## iter 10 value 5675.398252
## iter 20 value 5666.110152
## iter 30 value 5633.004713
## iter 40 value 5631.046225
## iter 50 value 5630.970996
## final value 5630.880497
## converged
## # weights: 771
## initial value 6022.606048
## iter 10 value 5916.629676
## iter 20 value 5633.097933
## iter 30 value 5565.838806
## iter 40 value 5470.896167
## iter 50 value 5402.817826
## iter 60 value 5346.790532
## iter 70 value 5286.162544
## iter 80 value 5228.661826
## iter 90 value 5051.226675
## iter 100 value 5020.416558
## final value 5020.416558
## stopped after 100 iterations
## # weights: 221
## initial value 6654.266264
## iter 10 value 5678.136076
## iter 20 value 5599.154788
## iter 30 value 5496.907527
## iter 40 value 5366.121848
## iter 50 value 5168.405804
## iter 60 value 5044.237213
## iter 70 value 4963.531901
## iter 80 value 4917.767250
## iter 90 value 4907.046156
## iter 100 value 4904.876208
## final value 4904.876208
## stopped after 100 iterations
## # weights: 331
## initial value 7008.732903
## iter 10 value 5944.072750
## iter 20 value 5898.770705
## iter 30 value 5619.574242
## iter 40 value 5517.730747
## iter 50 value 5492.725748
## iter 60 value 5451.921020
## iter 70 value 5399.450357
## iter 80 value 5297.230861
## iter 90 value 5286.166898
## iter 100 value 5018.336700
## final value 5018.336700
## stopped after 100 iterations
## # weights: 551
## initial value 6201.212600
## iter 10 value 5876.233384
## iter 20 value 5851.300905
## iter 30 value 5829.512545
## iter 40 value 5812.909622
## iter 50 value 5797.217905
## iter 60 value 5608.915380
## iter 70 value 5592.520752
## iter 80 value 5528.180643
## iter 90 value 5506.520955
## iter 100 value 5467.342440
## final value 5467.342440
## stopped after 100 iterations
## # weights: 771
## initial value 8065.857984
## iter 10 value 5939.058154
## iter 20 value 5720.843351
## iter 30 value 5660.216350
## iter 40 value 5612.246543
## iter 50 value 5477.873322
## iter 60 value 5276.274438
## iter 70 value 5083.850700
## iter 80 value 4972.950815
## iter 90 value 4958.500144
## iter 100 value 4936.394109
## final value 4936.394109
## stopped after 100 iterations
## # weights: 221
## initial value 6950.067207
## iter 10 value 5962.894980
## iter 20 value 5755.706187
## iter 30 value 5633.369813
## iter 40 value 5583.349387
## iter 50 value 5495.373832
## iter 60 value 5190.031850
## iter 70 value 4988.992786
## iter 80 value 4910.609026
## iter 90 value 4890.584909
## iter 100 value 4862.887154
## final value 4862.887154
## stopped after 100 iterations
## # weights: 331
## initial value 6690.899890
## iter 10 value 5969.821668
## iter 20 value 5742.348079
## iter 30 value 5722.514608
## iter 40 value 5716.129776
## iter 50 value 5595.145932
## iter 60 value 5559.459363
## iter 70 value 5536.427363
## iter 80 value 5325.604897
## iter 90 value 5263.336592
## iter 100 value 5207.810575
## final value 5207.810575
## stopped after 100 iterations
## # weights: 551
## initial value 6096.903000
## iter 10 value 5920.598322
## iter 20 value 5846.243371
## iter 30 value 5840.898056
## iter 40 value 5661.747264
## iter 50 value 5555.054417
## iter 60 value 5464.375650
## iter 70 value 5454.411016
## iter 80 value 5377.527365
## iter 90 value 5255.203771
## iter 100 value 5123.974506
## final value 5123.974506
## stopped after 100 iterations
## # weights: 771
## initial value 8409.407184
## iter 10 value 5718.947784
## iter 20 value 5634.824295
## iter 30 value 5632.173545
## iter 40 value 5607.226599
## iter 50 value 5586.977947
## iter 60 value 5501.854444
## iter 70 value 5480.435299
## iter 80 value 5465.007973
## iter 90 value 5447.756587
## iter 100 value 5415.456344
## final value 5415.456344
## stopped after 100 iterations
## # weights: 221
## initial value 6165.208528
## iter 10 value 5972.209202
## iter 20 value 5969.924671
## iter 30 value 5969.897823
## final value 5969.897562
## converged
## # weights: 331
## initial value 6549.820422
## iter 10 value 5890.338872
## iter 20 value 5645.157301
## iter 30 value 5634.287349
## iter 40 value 5607.457017
## iter 50 value 5567.383552
## iter 60 value 5518.423522
## iter 70 value 5391.740372
## iter 80 value 5199.419277
## iter 90 value 4976.569854
## iter 100 value 4870.380987
## final value 4870.380987
## stopped after 100 iterations
## # weights: 551
## initial value 7330.249099
## iter 10 value 5892.129820
## iter 20 value 5533.595020
## iter 30 value 5501.608586
## iter 40 value 5464.137084
## iter 50 value 5445.127185
## iter 60 value 5418.026048
## iter 70 value 5399.933197
## iter 80 value 5307.195977
## iter 90 value 5263.271005
## iter 100 value 5086.796314
## final value 5086.796314
## stopped after 100 iterations
## # weights: 771
## initial value 12686.400280
## iter 10 value 6034.620541
## iter 20 value 5916.354522
## iter 30 value 5661.976587
## iter 30 value 5661.976539
## final value 5661.976539
## converged
## # weights: 221
## initial value 7450.176285
## iter 10 value 5801.253272
## iter 20 value 5648.159964
## iter 30 value 5499.611931
## iter 40 value 5164.368211
## iter 50 value 4973.680224
## iter 60 value 4929.933992
## iter 70 value 4835.669513
## iter 80 value 4801.836057
## iter 90 value 4797.302964
## iter 100 value 4788.286016
## final value 4788.286016
## stopped after 100 iterations
## # weights: 331
## initial value 6080.134781
## iter 10 value 5959.236308
## iter 20 value 5788.947435
## iter 30 value 5628.053331
## iter 40 value 5624.691122
## iter 50 value 5578.642864
## iter 60 value 5495.122259
## iter 70 value 5462.645550
## iter 80 value 5457.193310
## iter 90 value 5443.629546
## iter 100 value 5420.272463
## final value 5420.272463
## stopped after 100 iterations
## # weights: 551
## initial value 7671.379250
## iter 10 value 5948.994083
## iter 20 value 5591.363783
## iter 30 value 5576.531125
## iter 40 value 5569.431481
## iter 50 value 5487.465457
## iter 60 value 5454.462363
## iter 70 value 5417.862955
## iter 80 value 5412.925919
## iter 90 value 5383.293368
## iter 100 value 5335.996121
## final value 5335.996121
## stopped after 100 iterations
## # weights: 771
## initial value 7316.881523
## iter 10 value 5942.311980
## iter 20 value 5641.847721
## iter 30 value 5605.997944
## iter 40 value 5595.471814
## iter 50 value 5501.505453
## iter 60 value 5460.656838
## iter 70 value 5433.652739
## iter 80 value 5424.730884
## iter 90 value 5422.933560
## iter 100 value 5390.531166
## final value 5390.531166
## stopped after 100 iterations
## # weights: 331
## initial value 11291.681310
## iter 10 value 8856.091618
## iter 20 value 8837.702052
## iter 30 value 8825.006643
## iter 40 value 8817.134785
## iter 50 value 8813.512309
## iter 60 value 8505.906210
## iter 70 value 8474.726896
## iter 80 value 8320.565169
## iter 90 value 8197.618345
## iter 100 value 7989.611255
## final value 7989.611255
## stopped after 100 iterations
Adult_TDA_PC_5.60.5_n3_NN1Fit0
## Neural Network
##
## 15744 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 10496, 10495, 10497
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.7679138 0.1700041
## 2 0.5 0.7675284 0.1765438
## 2 0.7 0.7799795 0.2429999
## 3 0.3 0.7802334 0.2374957
## 3 0.5 0.7816310 0.2406777
## 3 0.7 0.7784553 0.2129553
## 5 0.3 0.7811863 0.2456170
## 5 0.5 0.7763594 0.2310306
## 5 0.7 0.7764234 0.2005918
## 7 0.3 0.7764233 0.1971602
## 7 0.5 0.7740087 0.2276808
## 7 0.7 0.7813768 0.2535427
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.5.
Adult_TDA_PC_5.60.5_n3_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.7821612 0.2978058 Fold3
## 2 0.7835778 0.2233166 Fold2
## 3 0.7791540 0.2009106 Fold1
ad_tda_pc_5.60.5_n3_nn1_fit_re<-Adult_TDA_PC_5.60.5_n3_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n3_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.5
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.17 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## -0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.02 2.20 0.00 0.11 0.08 0.00 -0.05 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## -0.12 0.00 0.00 -0.50 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.07 0.00 0.00 -0.17 0.02 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.10 -0.34 -0.01 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.02 0.00 0.00 0.00 0.18 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 -0.12 0.00 -0.02 0.04 0.00 -0.01 -0.05
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.01 0.01 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 -0.05 0.00 0.06 0.00 0.01
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 6.03 0.00 -0.91 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.04 -0.05 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.03 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## -2.39 0.01 0.42 0.28 -0.53 0.00 0.09 -0.75
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## -1.72 0.05 -0.22 0.00 -0.51 0.23 -0.65 -0.83
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## -0.23 -0.55 -0.68 0.50 -0.56 0.12 1.05 -0.10
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## -0.20 -0.15 -0.28 0.46 -0.01 -0.25 -0.85 0.42
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## -0.29 -0.04 -1.64 0.26 0.42 1.03 0.10 0.77
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 1.02 0.22 0.19 0.71 -0.36 -0.03 0.59 -0.86
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 1.08 -6.48 -0.78 -0.34 -0.64 -0.47 -0.02 -0.21
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## -0.70 -0.87 -1.22 0.17 -0.54 0.07 -0.27 -2.11
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.01 0.27 -0.10 -0.10 -0.99 -0.36
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.41 -0.12 0.21 0.26 0.47 1.02 -1.06 -0.88
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## -1.38 -0.96 0.00 0.07 0.00 -0.26 -0.67 0.63
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.71 0.22 0.06 0.40 0.07 0.05 -0.03 -0.14
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## -0.03 1.53 0.55 -0.14 0.03 0.45 -1.29 -0.20
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## -0.07 0.21 -0.02 -1.10 -0.13
## b->o h1->o h2->o h3->o
## -0.50 -0.89 0.17 5.10
vip(Adult_TDA_PC_5.60.5_n3_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n3_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.60.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6729 1815
## >50K 687 537
##
## Accuracy : 0.7439
## 95% CI : (0.7351, 0.7525)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.9998
##
## Kappa : 0.1622
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.9074
## Specificity : 0.2283
## Pos Pred Value : 0.7876
## Neg Pred Value : 0.4387
## Prevalence : 0.7592
## Detection Rate : 0.6889
## Detection Prevalence : 0.8747
## Balanced Accuracy : 0.5678
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6729 1815
## >50K 687 537
##
## Accuracy : 0.7439
## 95% CI : (0.7351, 0.7525)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.9998
##
## Kappa : 0.1622
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.9074
## Specificity : 0.2283
## Pos Pred Value : 0.7876
## Neg Pred Value : 0.4387
## Prevalence : 0.7592
## Detection Rate : 0.6889
## Detection Prevalence : 0.8747
## Balanced Accuracy : 0.5678
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n3_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.438575e-01 1.622455e-01 7.350781e-01 7.524912e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 9.997987e-01 2.062967e-112
ad_tda_pc_5.60.5_n3_nn1_cf0_ov_acc<-ad_tda_pc_5.60.5_n3_nn1_cf0$overall[1]
ad_tda_pc_5.60.5_n3_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9073625 0.2283163 0.7875702
## Neg Pred Value Precision Recall
## 0.4387255 0.7875702 0.9073625
## F1 Prevalence Detection Rate
## 0.8432331 0.7592138 0.6888821
## Detection Prevalence Balanced Accuracy
## 0.8746929 0.5678394
ad_tda_pc_5.60.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n3_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.60.5_n3_nn1_fit_re)
diff_tda_pca_5.60.5_nn1_n3_3_fold
## Accuracy
## 1 0.02578927
## 2 0.06651431
## 3 0.02224114
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n3_3_fold_odds.left<-bst_tda_pca_5.60.5_nn1.n3_3_fold$probLeft/bst_tda_pca_5.60.5_nn1.n3_3_fold$probRight
bst_tda_pca_5.60.5_nn1.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.008233333
##
## $winRight
## [1] 0.9917667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n3_3_fold
## $left
## [1] 0.04948247
##
## $rope
## [1] 0.06445571
##
## $right
## [1] 0.8860618
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1_n3_3_fold))
#bf_tda_pca_5.60.5_nn1.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nn1_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_nn1_n3_3_fold)
## t = 2.6882, df = 2, p-value = 0.115
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.02293049 0.09929364
## sample estimates:
## mean of x
## 0.03818157
### Test set diff
diff_tda_pca_5.60.5_nn1.n3_test<-(nn1_cf_ov_acc - ad_tda_pc_5.60.5_n3_nn1_cf0_ov_acc)
diff_tda_pca_5.60.5_nn1.n3_test
## Accuracy
## 0.06122031
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1.n3_test),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n3_test_odds.left<-bst_tda_pca_5.60.5_nn1.n3_test$probLeft/bst_tda_pca_5.60.5_nn1.n3_test$probRight
bst_tda_pca_5.60.5_nn1.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1.n3_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1579333
##
## $winRight
## [1] 0.8420667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nn1.n3_test)))
#BayesFactor
#bf_tda_pca_5.60.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1.n3_test)) #bf_tda_pca_5.60.5_nn1.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nn1.n3_test))
##Node4
#Neural Network 1
Adult_TDA_PC_5.60.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n4.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 8318.339118
## iter 10 value 3175.581436
## iter 20 value 3175.360580
## iter 30 value 3174.429923
## iter 40 value 3171.479766
## iter 50 value 3056.906775
## iter 60 value 2934.500898
## iter 70 value 2931.848123
## iter 80 value 2857.138368
## iter 90 value 2830.855223
## iter 100 value 2830.370222
## final value 2830.370222
## stopped after 100 iterations
## # weights: 331
## initial value 15390.601350
## iter 10 value 3174.760310
## iter 10 value 3174.760310
## iter 20 value 3170.884067
## iter 30 value 3052.498543
## iter 40 value 2841.762584
## iter 50 value 2794.548792
## iter 60 value 2715.456353
## iter 70 value 2296.174422
## iter 80 value 2198.156499
## iter 90 value 2144.129432
## iter 100 value 2121.102740
## final value 2121.102740
## stopped after 100 iterations
## # weights: 551
## initial value 9658.704641
## iter 10 value 3176.535145
## iter 20 value 2918.635260
## iter 30 value 2839.893924
## iter 40 value 2803.776171
## iter 50 value 2750.467655
## iter 60 value 2641.970727
## iter 70 value 2521.974159
## iter 80 value 2417.398638
## iter 90 value 2399.528725
## iter 100 value 2363.327757
## final value 2363.327757
## stopped after 100 iterations
## # weights: 771
## initial value 12242.422298
## iter 10 value 3043.960771
## iter 20 value 2852.756756
## iter 30 value 2833.307137
## iter 40 value 2788.859258
## iter 50 value 2702.237556
## iter 60 value 2658.085353
## iter 70 value 2483.361322
## iter 80 value 2251.760553
## iter 90 value 2177.425476
## iter 100 value 2081.021450
## final value 2081.021450
## stopped after 100 iterations
## # weights: 221
## initial value 15105.373403
## iter 10 value 2895.099002
## iter 20 value 2689.810873
## iter 30 value 2494.716724
## iter 40 value 2304.040442
## iter 50 value 2255.921195
## iter 60 value 2175.513796
## iter 70 value 2133.816325
## iter 80 value 2084.598545
## iter 90 value 2078.448811
## iter 100 value 2077.113637
## final value 2077.113637
## stopped after 100 iterations
## # weights: 331
## initial value 9296.093651
## iter 10 value 3167.254065
## iter 20 value 2540.379769
## iter 30 value 2369.209402
## iter 40 value 2276.078337
## iter 50 value 2230.444370
## iter 60 value 2219.921975
## iter 70 value 2219.799235
## iter 80 value 2217.108119
## iter 90 value 2190.455378
## iter 100 value 2173.735733
## final value 2173.735733
## stopped after 100 iterations
## # weights: 551
## initial value 16151.221545
## iter 10 value 3178.016338
## iter 20 value 3169.402100
## iter 30 value 2935.554822
## iter 40 value 2912.830716
## iter 50 value 2857.119881
## iter 60 value 2810.852484
## iter 70 value 2800.294422
## iter 80 value 2797.664262
## iter 90 value 2785.745682
## iter 100 value 2781.891518
## final value 2781.891518
## stopped after 100 iterations
## # weights: 771
## initial value 17160.026245
## iter 10 value 3161.407550
## iter 20 value 2841.917904
## iter 30 value 2826.289848
## iter 40 value 2825.701657
## iter 50 value 2818.239924
## iter 60 value 2800.902493
## iter 70 value 2763.108985
## iter 80 value 2742.545434
## iter 90 value 2734.106569
## iter 100 value 2733.009686
## final value 2733.009686
## stopped after 100 iterations
## # weights: 221
## initial value 6017.847720
## iter 10 value 3178.123962
## iter 20 value 2958.530357
## iter 30 value 2913.292009
## iter 40 value 2881.405020
## iter 50 value 2775.653759
## iter 60 value 2719.126217
## iter 70 value 2533.008117
## iter 80 value 2345.463392
## iter 90 value 2277.314579
## iter 100 value 2176.085326
## final value 2176.085326
## stopped after 100 iterations
## # weights: 331
## initial value 12784.462090
## iter 10 value 3179.418885
## iter 20 value 3176.307362
## iter 30 value 3155.161516
## iter 40 value 3049.985344
## iter 50 value 2872.314473
## iter 60 value 2858.110191
## iter 70 value 2851.779222
## iter 80 value 2833.410158
## iter 90 value 2798.397836
## iter 100 value 2768.991243
## final value 2768.991243
## stopped after 100 iterations
## # weights: 551
## initial value 10000.285372
## iter 10 value 3189.986250
## iter 20 value 3179.762001
## iter 30 value 3178.584500
## iter 40 value 3171.521727
## iter 50 value 2973.688849
## iter 60 value 2883.847192
## iter 70 value 2731.243394
## iter 80 value 2708.232404
## iter 90 value 2676.948190
## iter 100 value 2601.783663
## final value 2601.783663
## stopped after 100 iterations
## # weights: 771
## initial value 7840.135723
## iter 10 value 3178.812950
## iter 20 value 3176.865202
## iter 30 value 3140.193649
## iter 40 value 2866.297815
## iter 50 value 2509.877301
## iter 60 value 2483.573391
## iter 70 value 2449.888344
## iter 80 value 2334.564148
## iter 90 value 2185.115722
## iter 100 value 2125.047485
## final value 2125.047485
## stopped after 100 iterations
## # weights: 221
## initial value 8105.949359
## iter 10 value 3235.228114
## iter 20 value 3179.968775
## iter 30 value 3069.350213
## iter 40 value 2870.088133
## iter 50 value 2797.936901
## iter 60 value 2784.644070
## iter 70 value 2783.233884
## iter 80 value 2777.014607
## iter 90 value 2738.231626
## iter 100 value 2660.087816
## final value 2660.087816
## stopped after 100 iterations
## # weights: 331
## initial value 7502.482838
## iter 10 value 3089.435871
## iter 20 value 3002.803427
## iter 30 value 2887.575634
## iter 40 value 2883.128806
## iter 50 value 2882.363134
## final value 2880.314962
## converged
## # weights: 551
## initial value 11823.398416
## iter 10 value 3212.127255
## iter 20 value 3198.743240
## iter 30 value 3019.173431
## iter 40 value 2936.624044
## iter 50 value 2832.306624
## iter 60 value 2820.085942
## iter 70 value 2811.087432
## iter 80 value 2798.797901
## iter 90 value 2769.774000
## iter 100 value 2607.251160
## final value 2607.251160
## stopped after 100 iterations
## # weights: 771
## initial value 5070.332763
## iter 10 value 2975.954881
## iter 20 value 2908.688311
## iter 30 value 2835.763453
## iter 40 value 2796.195464
## iter 50 value 2780.114996
## iter 60 value 2778.225304
## iter 70 value 2769.331775
## iter 80 value 2751.269880
## iter 90 value 2647.939916
## iter 100 value 2323.116948
## final value 2323.116948
## stopped after 100 iterations
## # weights: 221
## initial value 8356.841771
## iter 10 value 3180.975967
## iter 20 value 3180.421445
## iter 30 value 3177.244877
## iter 40 value 3026.800370
## iter 50 value 2906.900367
## iter 60 value 2895.611903
## iter 70 value 2882.824709
## iter 80 value 2882.041906
## iter 90 value 2879.461717
## iter 100 value 2870.999373
## final value 2870.999373
## stopped after 100 iterations
## # weights: 331
## initial value 12457.404844
## final value 3177.249126
## converged
## # weights: 551
## initial value 14689.373225
## iter 10 value 3185.663849
## iter 20 value 3156.742596
## iter 30 value 3155.369796
## iter 40 value 3152.555535
## iter 50 value 3141.118838
## iter 60 value 2908.799060
## iter 70 value 2757.910407
## iter 80 value 2553.488276
## iter 90 value 2370.294970
## iter 100 value 2331.520141
## final value 2331.520141
## stopped after 100 iterations
## # weights: 771
## initial value 8829.016112
## iter 10 value 3184.534095
## iter 20 value 3176.916576
## iter 30 value 3001.258611
## iter 40 value 2932.607976
## iter 50 value 2924.030669
## iter 60 value 2906.695369
## iter 70 value 2903.722815
## iter 80 value 2899.072467
## iter 90 value 2894.338672
## iter 100 value 2861.181597
## final value 2861.181597
## stopped after 100 iterations
## # weights: 221
## initial value 7228.136496
## iter 10 value 3083.664829
## iter 20 value 2905.775811
## iter 30 value 2903.209710
## iter 40 value 2901.545066
## iter 50 value 2863.284556
## iter 60 value 2812.497219
## iter 70 value 2796.960244
## iter 80 value 2766.297980
## iter 90 value 2718.071421
## iter 100 value 2529.017575
## final value 2529.017575
## stopped after 100 iterations
## # weights: 331
## initial value 10313.968089
## iter 10 value 3178.245814
## iter 20 value 3177.728970
## iter 30 value 3031.266190
## iter 40 value 2937.636917
## iter 50 value 2916.811479
## iter 60 value 2911.477572
## iter 70 value 2884.894518
## iter 80 value 2878.030742
## iter 90 value 2867.386363
## iter 100 value 2850.511592
## final value 2850.511592
## stopped after 100 iterations
## # weights: 551
## initial value 7843.657469
## iter 10 value 3242.403780
## iter 20 value 2950.731700
## iter 30 value 2878.574797
## iter 40 value 2840.009417
## iter 50 value 2824.447684
## iter 60 value 2802.561308
## iter 70 value 2767.344175
## iter 80 value 2744.200776
## iter 90 value 2688.569678
## iter 100 value 2637.296858
## final value 2637.296858
## stopped after 100 iterations
## # weights: 771
## initial value 9633.952674
## iter 10 value 3122.570308
## iter 20 value 2901.060055
## iter 30 value 2821.926566
## iter 40 value 2793.341837
## iter 50 value 2778.682667
## iter 60 value 2771.594887
## iter 70 value 2769.762496
## iter 70 value 2769.762473
## iter 80 value 2763.547982
## iter 90 value 2747.044167
## iter 100 value 2744.803651
## final value 2744.803651
## stopped after 100 iterations
## # weights: 221
## initial value 5051.137548
## iter 10 value 3174.497014
## iter 20 value 3174.397090
## iter 30 value 3146.885113
## iter 40 value 2929.660496
## iter 50 value 2919.480578
## iter 60 value 2868.733792
## iter 70 value 2855.700818
## iter 80 value 2848.870322
## iter 90 value 2830.271425
## iter 100 value 2816.466603
## final value 2816.466603
## stopped after 100 iterations
## # weights: 331
## initial value 5683.463752
## iter 10 value 3171.628339
## iter 20 value 2933.787291
## iter 30 value 2919.087869
## iter 40 value 2879.387645
## iter 50 value 2864.848373
## iter 60 value 2817.770655
## iter 70 value 2668.270024
## iter 80 value 2493.519074
## iter 90 value 2352.533083
## iter 100 value 2332.743505
## final value 2332.743505
## stopped after 100 iterations
## # weights: 551
## initial value 10375.938585
## iter 10 value 3176.765068
## iter 20 value 3013.188793
## iter 30 value 2930.148624
## iter 40 value 2897.274538
## iter 50 value 2887.512488
## iter 60 value 2861.701235
## iter 70 value 2842.146603
## iter 80 value 2755.422422
## iter 90 value 2731.539588
## iter 100 value 2729.308090
## final value 2729.308090
## stopped after 100 iterations
## # weights: 771
## initial value 13152.127600
## iter 10 value 3175.181735
## iter 20 value 2956.604572
## iter 30 value 2888.759612
## iter 40 value 2858.578193
## iter 50 value 2803.187673
## iter 60 value 2773.853296
## iter 70 value 2764.001758
## iter 80 value 2716.635155
## iter 90 value 2701.596286
## iter 100 value 2671.911237
## final value 2671.911237
## stopped after 100 iterations
## # weights: 221
## initial value 8288.788797
## iter 10 value 2975.175537
## iter 20 value 2775.725368
## iter 30 value 2763.784332
## iter 40 value 2761.376335
## iter 50 value 2760.602432
## iter 60 value 2683.496462
## iter 70 value 2507.431424
## iter 80 value 2464.144798
## iter 90 value 2288.473856
## iter 100 value 2283.575832
## final value 2283.575832
## stopped after 100 iterations
## # weights: 331
## initial value 9671.848902
## iter 10 value 2859.688135
## iter 20 value 2504.905439
## iter 30 value 2262.597822
## iter 40 value 2165.589678
## iter 50 value 2081.862368
## iter 60 value 2042.704581
## iter 70 value 1991.618373
## iter 80 value 1967.476807
## iter 90 value 1950.975698
## iter 100 value 1938.953919
## final value 1938.953919
## stopped after 100 iterations
## # weights: 551
## initial value 12638.603501
## iter 10 value 3184.338278
## iter 20 value 3122.185013
## iter 30 value 2921.536165
## iter 40 value 2912.406274
## iter 50 value 2908.742837
## iter 60 value 2906.500470
## iter 70 value 2880.582691
## iter 80 value 2860.134232
## iter 90 value 2848.552410
## iter 100 value 2841.594289
## final value 2841.594289
## stopped after 100 iterations
## # weights: 771
## initial value 8600.016886
## iter 10 value 3181.684562
## iter 20 value 3174.019852
## iter 30 value 3083.813432
## iter 40 value 2931.820085
## iter 50 value 2906.693364
## iter 60 value 2899.600727
## iter 70 value 2893.957575
## iter 80 value 2888.429971
## iter 90 value 2844.705290
## iter 100 value 2814.751672
## final value 2814.751672
## stopped after 100 iterations
## # weights: 221
## initial value 15192.729718
## iter 10 value 3184.035105
## iter 20 value 3182.922596
## iter 30 value 3179.499739
## iter 40 value 2921.662689
## iter 50 value 2865.086428
## iter 60 value 2850.217446
## iter 70 value 2836.951134
## iter 80 value 2818.399194
## iter 90 value 2777.562984
## iter 100 value 2733.174646
## final value 2733.174646
## stopped after 100 iterations
## # weights: 331
## initial value 17721.307928
## iter 10 value 3525.287185
## iter 20 value 3030.598398
## iter 30 value 2917.254479
## iter 40 value 2911.360417
## iter 50 value 2906.024953
## iter 60 value 2829.333868
## iter 70 value 2499.236845
## iter 80 value 2258.051548
## iter 90 value 2196.426328
## iter 100 value 2144.648359
## final value 2144.648359
## stopped after 100 iterations
## # weights: 551
## initial value 6791.879437
## iter 10 value 3175.017748
## iter 20 value 3064.812839
## iter 30 value 2930.537692
## iter 40 value 2806.167919
## iter 50 value 2642.438606
## iter 60 value 2355.564654
## iter 70 value 2217.849553
## iter 80 value 2143.030954
## iter 90 value 2133.078034
## iter 100 value 2085.849348
## final value 2085.849348
## stopped after 100 iterations
## # weights: 771
## initial value 15145.167293
## iter 10 value 3063.253436
## iter 20 value 2895.358504
## iter 30 value 2862.551201
## iter 40 value 2815.809818
## iter 50 value 2758.755841
## iter 60 value 2741.044779
## iter 70 value 2722.810868
## iter 80 value 2714.959606
## iter 90 value 2706.036479
## iter 100 value 2559.798981
## final value 2559.798981
## stopped after 100 iterations
## # weights: 551
## initial value 15761.835171
## iter 10 value 4767.064662
## iter 20 value 4735.684140
## iter 30 value 4727.024070
## iter 40 value 4623.855125
## iter 50 value 4113.938595
## iter 60 value 3629.048390
## iter 70 value 3580.696235
## iter 80 value 3564.600301
## iter 90 value 3528.658234
## iter 100 value 3509.068680
## final value 3509.068680
## stopped after 100 iterations
Adult_TDA_PC_5.60.5_n4_NN1Fit0
## Neural Network
##
## 19829 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 13219, 13220, 13219
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.9435677 0.2827854
## 2 0.5 0.9426092 0.3250682
## 2 0.7 0.9419535 0.2972836
## 3 0.3 0.9414998 0.3058963
## 3 0.5 0.9407431 0.2240087
## 3 0.7 0.9413485 0.2886483
## 5 0.3 0.9455848 0.3153222
## 5 0.5 0.9430633 0.2625668
## 5 0.7 0.9421556 0.3202957
## 7 0.3 0.9448787 0.3188726
## 7 0.5 0.9433657 0.2950004
## 7 0.7 0.9440720 0.2998438
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.3.
Adult_TDA_PC_5.60.5_n4_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.9426626 0.3097635 Fold1
## 2 0.9473525 0.3290017 Fold3
## 3 0.9467393 0.3072013 Fold2
ad_tda_pc_5.60.5_n4_nn1_fit_re<-Adult_TDA_PC_5.60.5_n4_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n4_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 -0.12 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 5.60 -0.02 1.27 -0.93 3.15 -0.01 0.69 -0.04
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## -0.70 2.06 0.11 0.00 2.03 -0.43 0.13 1.15
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.72 1.07 0.53 0.43 2.21 -1.04 -1.71 2.42
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -1.20 0.22 -1.20 0.28 0.07 0.35 -0.09 -0.59
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## -0.16 2.99 2.69 0.41 1.26 1.99 0.17 4.74
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## -1.78 2.48 1.36 2.39 2.13 0.61 -3.78 -2.73
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## -1.23 -1.90 -0.12 11.45 -0.83 1.69 3.80 0.84
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -11.36 0.81 1.17 1.99 0.61 1.01 2.43 3.17
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 -0.01 -0.24 0.32 0.03 0.58 0.12 -0.05
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.81 0.47 -0.51 0.14 -0.15 0.16 0.43 0.01
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## -0.71 0.68 0.00 0.30 0.09 -0.01 0.61 0.32
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## -0.46 -1.44 0.46 -0.14 -0.07 1.84 0.36 0.13
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.26 0.22 0.03 0.23 0.02 0.07 0.34 0.32
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## -0.02 0.05 -0.56 0.22 0.10
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 -0.05 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.01 -0.02 -0.03 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## 1.68 -0.03 -0.18 0.02 -0.51 0.07 0.60 1.01
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## 0.57 -0.04 0.15 0.00 0.63 1.09 0.69 0.16
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## -0.14 0.34 -0.31 -0.39 -0.09 -0.14 -1.81 0.34
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.01 0.17 0.94 0.20 -0.42 1.81 -0.26 -2.56
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.72 0.74 1.17 0.05 -0.11 -0.88 0.05 -3.26
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## -0.34 -0.22 0.53 1.42 0.94 0.35 1.91 0.61
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## -0.06 -0.29 1.02 -3.27 1.79 1.46 1.45 0.14
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 0.11 0.62 -0.56 0.06 0.53 1.03 2.54 -0.86
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.00 0.01 -0.04 -0.29 0.00 0.24 0.06 0.25
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 0.31 0.17 0.07 0.09 -0.17 0.31 0.66 0.20
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## -0.48 0.06 -0.01 0.05 0.03 -0.02 -0.13 0.26
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## -0.11 -0.09 -0.07 -0.25 -0.03 0.33 0.09 0.12
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.10 -0.39 0.20 -0.07 0.13 0.03 -0.12 0.29
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.04 0.06 0.08 -0.11 -0.20
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.64 -1.03 -0.01 -0.11 0.69 0.01 -0.22 0.12
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.15 0.02 0.00 0.00 0.27 0.12 0.10 0.02
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.06 -0.02 0.11 0.24 0.16 -0.17 -0.20 0.78
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## -0.17 0.00 -0.41 -0.24 -0.81 0.07 0.00 -0.29
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.12 0.67 0.13 -0.06 0.00 -0.51 0.01 0.02
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.36 0.20 0.32 0.26 -0.01 0.01 0.44 0.07
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## -0.80 0.13 0.15 1.28 0.11 0.33 0.63 -0.07
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## -1.64 -0.04 0.36 0.05 0.13 0.13 -1.24 1.87
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.00 -0.08 0.91 -0.27 0.00 -0.05 0.03 -0.14
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## -0.03 -0.01 -0.01 0.05 -0.03 -0.02 0.19 -0.05
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.02 0.01 0.00 0.00 0.01 0.00 0.01 0.02
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.05 0.03 -0.01 0.08 0.01 0.19 0.02 0.02
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.01 0.05 -0.13 0.06 0.05 0.04 0.09 -0.02
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.02 0.02 0.32 0.02 0.00
## b->o h1->o h2->o h3->o h4->o h5->o
## -0.89 -1.17 -3.12 0.56 -1.20 -0.84
vip(Adult_TDA_PC_5.60.5_n4_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n4_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.60.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n4_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.60.5_n4_nn1_cf0_ov_acc<-ad_tda_pc_5.60.5_n4_nn1_cf0$overall[1]
ad_tda_pc_5.60.5_n4_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.60.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n4_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.60.5_n4_nn1_fit_re)
diff_tda_pca_5.60.5_nn1_n4_3_fold
## Accuracy
## 1 -0.13471213
## 2 -0.09726037
## 3 -0.14534419
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n4_3_fold_odds.left<-bst_tda_pca_5.60.5_nn1.n4_3_fold$probLeft/bst_tda_pca_5.60.5_nn1.n4_3_fold$probRight
bst_tda_pca_5.60.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n4_3_fold
## $winLeft
## [1] 0.9905333
##
## $winRope
## [1] 0.009466667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n4_3_fold
## $left
## [1] 0.9897471
##
## $rope
## [1] 0.002735418
##
## $right
## [1] 0.007517514
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1_n4_3_fold))
#bf_tda_pca_5.60.5_nn1.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nn1_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_nn1_n4_3_fold)
## t = -8.6248, df = 2, p-value = 0.01318
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.18851600 -0.06302845
## sample estimates:
## mean of x
## -0.1257722
### Test set diff
diff_tda_pca_5.60.5_nn1.n4_test<-(nn1_cf_ov_acc - ad_tda_pc_5.60.5_n4_nn1_cf0_ov_acc)
diff_tda_pca_5.60.5_nn1.n4_test
## Accuracy
## 0.04586405
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1.n4_test),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n4_test_odds.left<-bst_tda_pca_5.60.5_nn1.n4_test$probLeft/bst_tda_pca_5.60.5_nn1.n4_test$probRight
bst_tda_pca_5.60.5_nn1.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1.n4_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1614667
##
## $winRight
## [1] 0.8385333
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nn1.n4_test)))
#BayesFactor
#bf_tda_pca_5.60.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1.n4_test)) #bf_tda_pca_5.60.5_nn1.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nn1.n4_test))
##Node5
#Neural Network 1
Adult_TDA_PC_5.60.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n5.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 5744.319145
## iter 10 value 529.818820
## iter 20 value 510.499806
## iter 30 value 469.733949
## iter 40 value 466.230785
## iter 50 value 466.078871
## iter 60 value 465.978826
## iter 70 value 465.963818
## final value 465.963073
## converged
## # weights: 331
## initial value 5446.402384
## iter 10 value 512.538285
## iter 20 value 509.511745
## iter 30 value 509.508676
## final value 509.508612
## converged
## # weights: 551
## initial value 4789.671418
## iter 10 value 514.692218
## iter 20 value 465.329815
## iter 30 value 463.718995
## iter 40 value 463.681077
## iter 50 value 463.588836
## final value 463.588792
## converged
## # weights: 771
## initial value 9727.499859
## iter 10 value 624.092746
## iter 20 value 474.510850
## iter 30 value 465.532539
## iter 40 value 456.566003
## iter 50 value 451.021045
## iter 60 value 449.962348
## iter 70 value 447.862393
## iter 80 value 447.747284
## iter 90 value 447.581743
## iter 100 value 447.113692
## final value 447.113692
## stopped after 100 iterations
## # weights: 221
## initial value 12546.602891
## iter 10 value 528.682879
## iter 20 value 517.466258
## iter 30 value 513.146511
## iter 40 value 495.850633
## iter 50 value 470.665466
## iter 60 value 469.527518
## iter 70 value 469.436900
## iter 80 value 468.826019
## iter 90 value 460.216695
## iter 100 value 453.059896
## final value 453.059896
## stopped after 100 iterations
## # weights: 331
## initial value 4568.715821
## iter 10 value 517.688694
## iter 20 value 511.885457
## iter 30 value 500.469364
## iter 40 value 499.921136
## iter 50 value 477.866822
## iter 60 value 475.126794
## iter 70 value 463.036067
## iter 80 value 460.677976
## iter 90 value 459.306339
## iter 100 value 459.206155
## final value 459.206155
## stopped after 100 iterations
## # weights: 551
## initial value 9728.463299
## iter 10 value 527.486788
## iter 20 value 512.842463
## iter 30 value 511.818931
## iter 40 value 511.482939
## iter 50 value 490.865844
## iter 60 value 476.388272
## iter 70 value 474.534711
## iter 80 value 472.193579
## iter 90 value 471.898340
## iter 100 value 471.897463
## final value 471.897463
## stopped after 100 iterations
## # weights: 771
## initial value 4833.386935
## iter 10 value 510.670986
## iter 20 value 510.085940
## iter 30 value 509.835623
## iter 40 value 486.201501
## iter 50 value 464.890123
## iter 60 value 463.378948
## iter 70 value 462.344129
## iter 80 value 460.921404
## iter 90 value 459.990182
## iter 100 value 456.672611
## final value 456.672611
## stopped after 100 iterations
## # weights: 221
## initial value 8236.221618
## iter 10 value 564.735986
## iter 20 value 556.639252
## iter 30 value 526.113151
## iter 40 value 501.567774
## iter 50 value 483.992562
## iter 60 value 480.212485
## iter 70 value 477.996847
## iter 80 value 477.539900
## iter 90 value 475.653586
## iter 100 value 467.254597
## final value 467.254597
## stopped after 100 iterations
## # weights: 331
## initial value 5769.826818
## iter 10 value 518.481365
## iter 20 value 512.466001
## iter 30 value 512.404822
## iter 40 value 485.976366
## iter 50 value 468.085955
## iter 60 value 465.700517
## iter 70 value 464.140859
## iter 80 value 461.571077
## iter 90 value 428.981117
## iter 100 value 408.692582
## final value 408.692582
## stopped after 100 iterations
## # weights: 551
## initial value 8711.121441
## iter 10 value 543.473074
## iter 20 value 513.706485
## iter 30 value 513.016507
## iter 40 value 511.106873
## iter 50 value 510.605340
## iter 60 value 479.964587
## iter 70 value 472.936586
## iter 80 value 472.426800
## iter 90 value 472.423073
## final value 472.423062
## converged
## # weights: 771
## initial value 4900.994750
## iter 10 value 551.882979
## iter 20 value 504.207314
## iter 30 value 501.719517
## iter 40 value 501.500664
## iter 50 value 476.920411
## iter 60 value 467.355233
## iter 70 value 459.482623
## iter 80 value 434.398228
## iter 90 value 409.534361
## iter 100 value 407.631610
## final value 407.631610
## stopped after 100 iterations
## # weights: 221
## initial value 6657.709644
## iter 10 value 512.529786
## iter 20 value 498.991152
## iter 30 value 463.713605
## iter 40 value 457.987267
## iter 50 value 456.634079
## iter 60 value 454.072880
## iter 70 value 451.917684
## iter 80 value 437.771128
## iter 90 value 436.433228
## iter 100 value 436.030981
## final value 436.030981
## stopped after 100 iterations
## # weights: 331
## initial value 8564.650082
## iter 10 value 513.690398
## iter 20 value 512.244659
## iter 30 value 460.932181
## iter 40 value 459.216797
## iter 50 value 457.358620
## iter 60 value 457.176808
## iter 70 value 456.493614
## final value 456.492582
## converged
## # weights: 551
## initial value 6988.321102
## iter 10 value 508.353802
## iter 20 value 506.200610
## iter 30 value 485.825824
## iter 40 value 466.495919
## iter 50 value 453.136293
## iter 60 value 452.268900
## iter 70 value 449.438731
## iter 80 value 444.344866
## iter 90 value 436.986419
## iter 100 value 433.449859
## final value 433.449859
## stopped after 100 iterations
## # weights: 771
## initial value 12957.124366
## iter 10 value 554.346890
## iter 20 value 471.265413
## iter 30 value 460.080303
## iter 40 value 457.724202
## iter 50 value 454.600215
## iter 60 value 445.195934
## iter 70 value 429.773230
## iter 80 value 421.737512
## iter 90 value 414.999209
## iter 100 value 406.581763
## final value 406.581763
## stopped after 100 iterations
## # weights: 221
## initial value 8519.544887
## iter 10 value 529.079982
## iter 20 value 465.261124
## iter 30 value 459.497913
## iter 40 value 451.944860
## iter 50 value 448.090366
## iter 60 value 444.752232
## iter 70 value 441.347057
## iter 80 value 440.802791
## iter 90 value 438.719419
## iter 100 value 432.522046
## final value 432.522046
## stopped after 100 iterations
## # weights: 331
## initial value 5685.335268
## iter 10 value 508.725814
## iter 20 value 507.151162
## iter 30 value 477.534391
## iter 40 value 465.922383
## iter 50 value 461.420369
## iter 60 value 461.268208
## iter 70 value 461.253914
## iter 80 value 458.118444
## iter 90 value 448.023231
## iter 100 value 447.235003
## final value 447.235003
## stopped after 100 iterations
## # weights: 551
## initial value 13774.946913
## iter 10 value 509.166427
## iter 20 value 481.758503
## iter 30 value 467.808807
## iter 40 value 446.474395
## iter 50 value 445.044142
## iter 60 value 444.253630
## iter 70 value 441.508604
## iter 80 value 440.471817
## iter 90 value 439.643231
## iter 100 value 439.528749
## final value 439.528749
## stopped after 100 iterations
## # weights: 771
## initial value 19235.290237
## iter 10 value 517.241428
## iter 20 value 506.261843
## iter 30 value 492.547737
## iter 40 value 465.143816
## iter 50 value 462.960671
## iter 60 value 462.005302
## iter 70 value 458.343491
## iter 80 value 449.892479
## iter 90 value 435.254656
## iter 100 value 423.249798
## final value 423.249798
## stopped after 100 iterations
## # weights: 221
## initial value 7469.413957
## iter 10 value 514.319064
## iter 20 value 510.278415
## iter 30 value 504.194094
## iter 40 value 487.120834
## iter 50 value 472.757394
## iter 60 value 465.497980
## iter 70 value 453.949706
## iter 80 value 452.679734
## iter 90 value 452.638890
## iter 100 value 452.600877
## final value 452.600877
## stopped after 100 iterations
## # weights: 331
## initial value 11114.887678
## iter 10 value 538.810521
## iter 20 value 530.294489
## iter 30 value 473.869910
## iter 40 value 433.605750
## iter 50 value 394.665715
## iter 60 value 384.265163
## iter 70 value 383.223445
## iter 80 value 383.125525
## iter 90 value 382.984377
## iter 100 value 382.976870
## final value 382.976870
## stopped after 100 iterations
## # weights: 551
## initial value 4887.726055
## iter 10 value 555.239197
## iter 20 value 492.355948
## iter 30 value 457.569491
## iter 40 value 446.150375
## iter 50 value 438.965633
## iter 60 value 424.560087
## iter 70 value 416.121554
## iter 80 value 415.427769
## iter 90 value 403.779804
## iter 100 value 395.538274
## final value 395.538274
## stopped after 100 iterations
## # weights: 771
## initial value 6769.310794
## iter 10 value 770.992757
## iter 20 value 508.084111
## iter 30 value 495.589740
## iter 40 value 476.178686
## iter 50 value 469.189048
## iter 60 value 464.460221
## iter 70 value 455.699350
## iter 80 value 447.920111
## iter 90 value 445.223690
## iter 100 value 444.881900
## final value 444.881900
## stopped after 100 iterations
## # weights: 221
## initial value 16326.122167
## iter 10 value 635.278690
## iter 20 value 514.715316
## iter 30 value 504.441910
## iter 40 value 489.223384
## iter 50 value 478.296733
## iter 60 value 474.820788
## iter 70 value 472.758898
## iter 80 value 471.248799
## iter 90 value 470.951009
## iter 100 value 467.711320
## final value 467.711320
## stopped after 100 iterations
## # weights: 331
## initial value 9022.382860
## iter 10 value 515.020284
## iter 20 value 494.836856
## iter 30 value 445.759777
## iter 40 value 393.672025
## iter 50 value 386.199551
## final value 385.698846
## converged
## # weights: 551
## initial value 14509.334721
## iter 10 value 849.163485
## iter 20 value 513.140193
## iter 30 value 510.369675
## iter 40 value 510.270878
## iter 50 value 509.137037
## iter 60 value 503.876267
## iter 70 value 454.831760
## iter 80 value 425.115300
## iter 90 value 417.409083
## iter 100 value 410.733317
## final value 410.733317
## stopped after 100 iterations
## # weights: 771
## initial value 11281.506291
## iter 10 value 697.647766
## iter 20 value 509.367401
## iter 30 value 480.146169
## iter 40 value 468.646012
## iter 50 value 439.868736
## iter 60 value 438.492237
## iter 70 value 437.049750
## iter 80 value 409.503280
## iter 90 value 390.223824
## iter 100 value 384.410660
## final value 384.410660
## stopped after 100 iterations
## # weights: 221
## initial value 6384.261533
## iter 10 value 513.571845
## iter 20 value 513.568031
## iter 30 value 476.441502
## iter 40 value 471.598170
## iter 50 value 458.590627
## iter 60 value 457.081374
## iter 70 value 456.967952
## iter 80 value 456.657810
## iter 90 value 456.054236
## iter 100 value 455.823674
## final value 455.823674
## stopped after 100 iterations
## # weights: 331
## initial value 5425.588634
## iter 10 value 525.363341
## iter 20 value 519.428935
## iter 30 value 485.255351
## iter 40 value 476.747230
## iter 50 value 464.542410
## iter 60 value 461.960984
## iter 70 value 461.007174
## iter 80 value 456.503204
## iter 90 value 453.699200
## iter 100 value 451.205291
## final value 451.205291
## stopped after 100 iterations
## # weights: 551
## initial value 8982.507571
## iter 10 value 491.790085
## iter 20 value 468.173252
## iter 30 value 466.227226
## iter 40 value 466.159977
## iter 50 value 463.754555
## iter 60 value 462.106878
## iter 70 value 461.435056
## iter 80 value 461.382917
## iter 90 value 461.361078
## iter 100 value 461.102746
## final value 461.102746
## stopped after 100 iterations
## # weights: 771
## initial value 8910.233964
## iter 10 value 517.644002
## iter 20 value 474.877542
## iter 30 value 471.164247
## iter 40 value 459.982799
## iter 50 value 454.325169
## iter 60 value 448.410252
## iter 70 value 445.455713
## iter 80 value 445.294998
## iter 90 value 445.233850
## iter 100 value 444.772027
## final value 444.772027
## stopped after 100 iterations
## # weights: 221
## initial value 10904.936002
## iter 10 value 540.880793
## iter 20 value 513.178322
## iter 30 value 472.736623
## iter 40 value 470.711787
## iter 50 value 470.348302
## iter 60 value 469.326253
## iter 70 value 468.484118
## iter 80 value 465.988809
## iter 90 value 449.339196
## iter 100 value 446.457557
## final value 446.457557
## stopped after 100 iterations
## # weights: 331
## initial value 8805.300389
## iter 10 value 539.971702
## iter 20 value 506.531270
## iter 30 value 480.788798
## iter 40 value 465.013440
## iter 50 value 457.788708
## iter 60 value 457.641176
## iter 70 value 457.438871
## iter 80 value 456.683416
## iter 90 value 451.680347
## iter 100 value 447.896511
## final value 447.896511
## stopped after 100 iterations
## # weights: 551
## initial value 16741.876046
## iter 10 value 662.369923
## iter 20 value 512.576860
## iter 30 value 511.838965
## iter 40 value 480.001930
## iter 50 value 469.892945
## iter 60 value 464.530940
## iter 70 value 459.221985
## iter 80 value 458.711448
## iter 90 value 457.370387
## iter 100 value 452.080807
## final value 452.080807
## stopped after 100 iterations
## # weights: 771
## initial value 14710.520363
## iter 10 value 519.102588
## iter 20 value 488.600552
## iter 30 value 464.092053
## iter 40 value 461.703874
## iter 50 value 458.300179
## iter 60 value 453.885813
## iter 70 value 452.148766
## iter 80 value 448.499457
## iter 90 value 447.506977
## iter 100 value 443.215628
## final value 443.215628
## stopped after 100 iterations
## # weights: 331
## initial value 8081.991600
## iter 10 value 719.850195
## iter 20 value 713.105264
## iter 30 value 694.041719
## iter 40 value 687.118684
## iter 50 value 660.927249
## iter 60 value 626.971663
## iter 70 value 603.953913
## iter 80 value 583.131858
## iter 90 value 578.753232
## iter 100 value 578.715980
## final value 578.715980
## stopped after 100 iterations
Adult_TDA_PC_5.60.5_n5_NN1Fit0
## Neural Network
##
## 16508 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11006, 11004, 11006
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.9924884 0.0980688588
## 2 0.5 0.9929125 0.2418254442
## 2 0.7 0.9924280 0.0825568948
## 3 0.3 0.9920645 -0.0001184563
## 3 0.5 0.9927309 0.1758420031
## 3 0.7 0.9930337 0.2636034862
## 5 0.3 0.9924884 0.0980688588
## 5 0.5 0.9924884 0.0980688588
## 5 0.7 0.9923673 0.0944195576
## 7 0.3 0.9925491 0.1494826371
## 7 0.5 0.9926703 0.1655346275
## 7 0.7 0.9921250 0.0000000000
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.7.
Adult_TDA_PC_5.60.5_n5_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.9929117 0.2025554 Fold3
## 2 0.9930959 0.2942066 Fold2
## 3 0.9930934 0.2940485 Fold1
ad_tda_pc_5.60.5_n5_nn1_fit_re<-Adult_TDA_PC_5.60.5_n5_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n5_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.7
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 1.34 -0.03 0.12 -0.21 0.04 0.01 -0.29 0.49
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.92 0.23 0.03 0.00 0.91 -0.30 0.06 0.41
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## -0.23 0.77 0.32 0.08 -0.13 -0.45 0.05 0.05
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -0.13 0.18 0.14 -0.41 0.06 0.08 -1.20 0.45
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.58 0.38 0.77 0.28 0.13 0.07 0.00 -0.26
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## -0.29 0.39 0.72 0.61 0.00 0.73 0.43 -1.12
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## -0.18 -0.37 0.50 0.02 0.09 0.69 0.47 0.20
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -0.13 0.45 0.02 0.33 -0.09 0.64 -0.52 1.86
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 -0.03 0.64 0.06 0.28 0.16 0.19
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.33 0.27 0.07 0.24 -0.31 0.09 0.40 0.03
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.01 0.19 0.00 0.04 0.06 0.05 -0.39 0.05
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## -0.44 0.20 -0.23 -0.61 -0.51 0.19 0.10 0.12
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.11 -0.36 0.14 -0.78 0.41 0.04 0.20 0.11
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.12 0.13 0.24 -0.31 0.01
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.05 0.45 0.00 0.00 0.00 0.00 0.04 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 -0.10 0.00 0.02 0.00 -0.07
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.20 0.42 0.20 0.00 0.01
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 -0.19 0.04 0.00 0.00 -0.03 0.00 0.01
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.02 0.00 0.04 0.00 0.01 0.00 -0.02 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.02 0.00 0.01 0.00 0.12 0.00 0.02 -0.09
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.02 -0.07 -0.01 0.00 0.11 0.02 0.03
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.05 0.13 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.02 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.03 0.00 0.00
## b->o h1->o h2->o h3->o
## 0.54 0.28 -5.55 -2.63
vip(Adult_TDA_PC_5.60.5_n5_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.60.5_n5_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.60.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7414 2157
## >50K 2 195
##
## Accuracy : 0.779
## 95% CI : (0.7706, 0.7872)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 2.137e-06
##
## Kappa : 0.1203
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.99973
## Specificity : 0.08291
## Pos Pred Value : 0.77463
## Neg Pred Value : 0.98985
## Prevalence : 0.75921
## Detection Rate : 0.75901
## Detection Prevalence : 0.97983
## Balanced Accuracy : 0.54132
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7414 2157
## >50K 2 195
##
## Accuracy : 0.779
## 95% CI : (0.7706, 0.7872)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 2.137e-06
##
## Kappa : 0.1203
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.99973
## Specificity : 0.08291
## Pos Pred Value : 0.77463
## Neg Pred Value : 0.98985
## Prevalence : 0.75921
## Detection Rate : 0.75901
## Detection Prevalence : 0.97983
## Balanced Accuracy : 0.54132
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n5_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.789722e-01 1.202586e-01 7.706098e-01 7.871679e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 2.136509e-06 0.000000e+00
ad_tda_pc_5.60.5_n5_nn1_cf0_ov_acc<-ad_tda_pc_5.60.5_n5_nn1_cf0$overall[1]
ad_tda_pc_5.60.5_n5_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.99973031 0.08290816 0.77463170
## Neg Pred Value Precision Recall
## 0.98984772 0.77463170 0.99973031
## F1 Prevalence Detection Rate
## 0.87290281 0.75921376 0.75900901
## Detection Prevalence Balanced Accuracy
## 0.97983210 0.54131924
ad_tda_pc_5.60.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n5_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.60.5_n5_nn1_fit_re)
diff_tda_pca_5.60.5_nn1_n5_3_fold
## Accuracy
## 1 -0.1849612
## 2 -0.1430038
## 3 -0.1916983
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n5_3_fold_odds.left<-bst_tda_pca_5.60.5_nn1.n5_3_fold$probLeft/bst_tda_pca_5.60.5_nn1.n5_3_fold$probRight
bst_tda_pca_5.60.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9904667
##
## $winRope
## [1] 0.009533333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n5_3_fold
## $left
## [1] 0.9942923
##
## $rope
## [1] 0.001162052
##
## $right
## [1] 0.00454561
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1_n5_3_fold))
#bf_tda_pca_5.60.5_nn1.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nn1_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_nn1_n5_3_fold)
## t = -11.371, df = 2, p-value = 0.007645
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2387647 -0.1076775
## sample estimates:
## mean of x
## -0.1732211
### Test set diff
diff_tda_pca_5.60.5_nn1.n5_test<-(nn1_cf_ov_acc - ad_tda_pc_5.60.5_n5_nn1_cf0_ov_acc)
diff_tda_pca_5.60.5_nn1.n5_test
## Accuracy
## 0.02610565
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nn1.n5_test),-0.01,0.01)
bst_tda_pca_5.60.5_nn1.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nn1.n5_test_odds.left<-bst_tda_pca_5.60.5_nn1.n5_test$probLeft/bst_tda_pca_5.60.5_nn1.n5_test$probRight
bst_tda_pca_5.60.5_nn1.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nn1.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nn1.n5_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nn1.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1602667
##
## $winRight
## [1] 0.8397333
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nn1.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nn1.n5_test)))
#BayesFactor
#bf_tda_pca_5.60.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nn1.n5_test)) #bf_tda_pca_5.60.5_nn1.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nn1.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
#Neural Network 1
Adult_TDA_KDE_5.60.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n1.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 8465.559983
## iter 10 value 5829.002301
## iter 20 value 5319.297405
## iter 30 value 5299.355528
## iter 40 value 5295.398757
## iter 50 value 5181.776784
## iter 60 value 5170.895711
## iter 70 value 5151.295770
## iter 80 value 5136.449119
## iter 90 value 5111.035887
## iter 100 value 5089.800014
## final value 5089.800014
## stopped after 100 iterations
## # weights: 331
## initial value 6214.957437
## iter 10 value 5689.948347
## iter 20 value 5258.030285
## iter 30 value 5211.248445
## iter 40 value 5187.593001
## iter 50 value 5173.187762
## iter 60 value 5147.515584
## final value 5139.704010
## converged
## # weights: 551
## initial value 5947.428872
## iter 10 value 5355.604003
## iter 20 value 5312.712076
## iter 30 value 5285.882495
## iter 40 value 5267.397130
## iter 50 value 5259.116009
## iter 60 value 5208.107280
## iter 70 value 5094.149356
## iter 80 value 5032.122756
## iter 90 value 5012.069983
## iter 100 value 4990.421265
## final value 4990.421265
## stopped after 100 iterations
## # weights: 771
## initial value 16651.045269
## iter 10 value 5745.875122
## iter 20 value 5368.905534
## iter 30 value 5321.965699
## iter 40 value 5230.397376
## iter 50 value 5164.039072
## iter 60 value 5032.891004
## iter 70 value 4710.014887
## iter 80 value 4454.066398
## iter 90 value 4141.225673
## iter 100 value 3742.723359
## final value 3742.723359
## stopped after 100 iterations
## # weights: 221
## initial value 5859.175865
## iter 10 value 5465.817495
## iter 20 value 5377.056644
## iter 30 value 5359.127853
## iter 40 value 5282.128166
## iter 50 value 5208.545629
## iter 60 value 5173.000458
## iter 70 value 5136.866066
## iter 80 value 5000.719077
## iter 90 value 4454.580725
## iter 100 value 4142.264025
## final value 4142.264025
## stopped after 100 iterations
## # weights: 331
## initial value 7539.309266
## iter 10 value 5396.202195
## iter 20 value 5312.886622
## iter 30 value 5295.053578
## iter 40 value 5275.743780
## iter 50 value 5230.714335
## iter 60 value 5122.769181
## iter 70 value 4921.770492
## iter 80 value 3967.168035
## iter 90 value 3458.297058
## iter 100 value 3385.331984
## final value 3385.331984
## stopped after 100 iterations
## # weights: 551
## initial value 8122.214841
## iter 10 value 5851.600916
## iter 20 value 5828.393775
## iter 30 value 5409.889867
## iter 40 value 5405.794620
## iter 50 value 5333.972305
## iter 60 value 5306.831200
## iter 70 value 5301.919769
## iter 80 value 5177.319839
## iter 90 value 5126.153685
## iter 100 value 4879.503329
## final value 4879.503329
## stopped after 100 iterations
## # weights: 771
## initial value 6351.236449
## iter 10 value 5593.298971
## iter 20 value 5330.497058
## iter 30 value 5294.346991
## iter 40 value 5194.513233
## iter 50 value 5102.311211
## iter 60 value 5064.063684
## iter 70 value 5042.833622
## iter 80 value 5029.238263
## iter 90 value 5023.262702
## iter 100 value 5001.453952
## final value 5001.453952
## stopped after 100 iterations
## # weights: 221
## initial value 6376.584910
## iter 10 value 5810.279643
## iter 20 value 5775.587965
## iter 30 value 5771.514624
## iter 40 value 5771.366477
## iter 50 value 5536.989033
## iter 60 value 5387.301783
## iter 70 value 5300.523812
## iter 80 value 5198.360285
## iter 90 value 5187.062093
## iter 100 value 5153.847750
## final value 5153.847750
## stopped after 100 iterations
## # weights: 331
## initial value 8033.482786
## iter 10 value 5785.841349
## iter 20 value 5349.059316
## iter 30 value 5309.636990
## iter 40 value 5299.000057
## iter 50 value 5284.752590
## iter 60 value 5189.489794
## iter 70 value 5105.963999
## iter 80 value 5037.046508
## iter 90 value 4979.711987
## iter 100 value 4921.753245
## final value 4921.753245
## stopped after 100 iterations
## # weights: 551
## initial value 6763.700353
## iter 10 value 5710.858596
## iter 20 value 5298.288384
## iter 30 value 5232.925610
## iter 40 value 5198.135999
## iter 50 value 5165.606388
## iter 60 value 5042.814264
## iter 70 value 5008.686998
## iter 80 value 4942.865133
## iter 90 value 4874.973119
## iter 100 value 4465.618447
## final value 4465.618447
## stopped after 100 iterations
## # weights: 771
## initial value 7210.017025
## iter 10 value 5483.131626
## iter 20 value 5245.878143
## iter 30 value 5184.126733
## iter 40 value 5159.279176
## iter 50 value 5131.830063
## iter 60 value 5067.936722
## iter 70 value 4996.867314
## iter 80 value 4826.777695
## iter 90 value 4300.679144
## iter 100 value 3849.874966
## final value 3849.874966
## stopped after 100 iterations
## # weights: 221
## initial value 7913.250856
## iter 10 value 5407.245011
## iter 20 value 5362.955726
## iter 30 value 5321.412614
## iter 40 value 5314.118088
## iter 50 value 5301.367529
## iter 60 value 5213.465527
## iter 70 value 5125.657642
## iter 80 value 4961.743279
## iter 90 value 4836.986301
## iter 100 value 4616.470175
## final value 4616.470175
## stopped after 100 iterations
## # weights: 331
## initial value 5845.500759
## iter 10 value 5720.087488
## iter 20 value 5712.836993
## iter 30 value 5712.799313
## iter 30 value 5712.799287
## iter 40 value 5323.056680
## iter 50 value 5302.826412
## iter 60 value 5272.806736
## iter 70 value 5200.130963
## iter 80 value 5165.669793
## iter 90 value 5052.274224
## iter 100 value 4902.870031
## final value 4902.870031
## stopped after 100 iterations
## # weights: 551
## initial value 6631.141780
## iter 10 value 5641.641203
## iter 20 value 5421.443229
## iter 30 value 5171.342610
## iter 40 value 5101.740330
## iter 50 value 5082.083842
## iter 60 value 5052.634424
## iter 70 value 5011.580325
## iter 80 value 4914.761727
## iter 90 value 4593.551084
## iter 100 value 4128.260952
## final value 4128.260952
## stopped after 100 iterations
## # weights: 771
## initial value 6077.353894
## iter 10 value 5565.578720
## iter 20 value 5520.167414
## iter 30 value 5333.192735
## iter 40 value 5330.710470
## iter 50 value 5329.535528
## iter 60 value 5317.961896
## iter 70 value 5196.119309
## iter 80 value 5159.010014
## iter 90 value 5143.013448
## iter 100 value 5138.037807
## final value 5138.037807
## stopped after 100 iterations
## # weights: 221
## initial value 7378.949145
## iter 10 value 5581.619065
## iter 20 value 5377.876496
## iter 30 value 5373.698473
## iter 40 value 5371.897869
## iter 50 value 5368.368677
## iter 60 value 5189.185265
## iter 70 value 5080.940308
## iter 80 value 5017.783341
## iter 90 value 4969.579813
## iter 100 value 4683.050505
## final value 4683.050505
## stopped after 100 iterations
## # weights: 331
## initial value 6941.792389
## iter 10 value 5826.281614
## iter 20 value 5670.922667
## iter 30 value 5386.431581
## iter 40 value 5326.847831
## iter 50 value 5224.151841
## iter 60 value 5148.944794
## iter 70 value 5120.950540
## iter 80 value 5010.635298
## iter 90 value 4958.840060
## iter 100 value 4769.318765
## final value 4769.318765
## stopped after 100 iterations
## # weights: 551
## initial value 6156.362291
## iter 10 value 5827.000790
## iter 20 value 5824.039923
## iter 30 value 5344.130053
## iter 40 value 5187.183619
## iter 50 value 5105.106377
## iter 60 value 5010.472002
## iter 70 value 4536.604954
## iter 80 value 3683.564289
## iter 90 value 3554.356659
## iter 100 value 3451.084757
## final value 3451.084757
## stopped after 100 iterations
## # weights: 771
## initial value 5939.768141
## iter 10 value 5403.656511
## iter 20 value 5341.897310
## iter 30 value 5327.879159
## iter 40 value 5318.546460
## iter 50 value 5310.170903
## iter 50 value 5310.170860
## iter 60 value 5303.911885
## iter 70 value 5292.448352
## iter 80 value 5255.731739
## iter 90 value 4986.878355
## iter 100 value 4925.022304
## final value 4925.022304
## stopped after 100 iterations
## # weights: 221
## initial value 8258.255589
## iter 10 value 5830.549283
## iter 20 value 5594.505010
## iter 30 value 5494.365984
## iter 40 value 5416.801063
## iter 50 value 5260.545854
## iter 60 value 5226.171879
## iter 70 value 5200.479706
## iter 80 value 5189.483636
## iter 90 value 5165.303682
## iter 100 value 5113.398499
## final value 5113.398499
## stopped after 100 iterations
## # weights: 331
## initial value 5954.211205
## iter 10 value 5735.372298
## iter 20 value 5379.369366
## iter 30 value 5209.627118
## iter 40 value 5088.987553
## iter 50 value 5067.861080
## iter 60 value 4857.912386
## iter 70 value 4657.365836
## iter 80 value 4340.906132
## iter 90 value 3940.330988
## iter 100 value 3777.015592
## final value 3777.015592
## stopped after 100 iterations
## # weights: 551
## initial value 5904.773617
## iter 10 value 5360.180271
## iter 20 value 5237.140957
## iter 30 value 5198.874399
## iter 40 value 5168.369076
## iter 50 value 5152.557600
## iter 60 value 5117.308701
## iter 70 value 5079.444046
## iter 80 value 5055.979210
## iter 90 value 5011.820062
## iter 100 value 4967.734737
## final value 4967.734737
## stopped after 100 iterations
## # weights: 771
## initial value 6811.759369
## iter 10 value 5615.923512
## iter 20 value 5370.547746
## iter 30 value 5265.213283
## iter 40 value 5241.215139
## iter 50 value 5229.833420
## iter 60 value 5220.882839
## iter 70 value 5197.739559
## iter 80 value 5120.996718
## iter 90 value 5029.203743
## iter 100 value 5012.711509
## final value 5012.711509
## stopped after 100 iterations
## # weights: 221
## initial value 7289.737966
## iter 10 value 5634.553356
## iter 20 value 5425.952790
## iter 30 value 5240.199337
## iter 40 value 5115.857597
## iter 50 value 5006.891952
## iter 60 value 4927.928523
## iter 70 value 4832.169442
## iter 80 value 4672.859151
## iter 90 value 3951.081596
## iter 100 value 3798.484431
## final value 3798.484431
## stopped after 100 iterations
## # weights: 331
## initial value 6298.035331
## iter 10 value 5470.177218
## iter 20 value 5373.344217
## iter 30 value 5280.713412
## iter 40 value 5184.828443
## iter 50 value 5162.871260
## iter 60 value 5024.389911
## iter 70 value 4823.797658
## iter 80 value 4441.558408
## iter 90 value 3888.155514
## iter 100 value 3583.621289
## final value 3583.621289
## stopped after 100 iterations
## # weights: 551
## initial value 9515.677817
## iter 10 value 5637.707632
## iter 20 value 5196.656240
## iter 30 value 5182.139262
## iter 40 value 5166.925578
## iter 50 value 5131.688468
## iter 60 value 4820.998390
## iter 70 value 4230.497906
## iter 80 value 3878.872060
## iter 90 value 3508.988663
## iter 100 value 3349.886175
## final value 3349.886175
## stopped after 100 iterations
## # weights: 771
## initial value 12262.757594
## iter 10 value 5338.300624
## iter 20 value 5290.037819
## iter 30 value 5281.911666
## iter 40 value 5279.537993
## iter 50 value 5278.708911
## iter 60 value 5275.530593
## iter 70 value 5273.626062
## iter 80 value 5273.449319
## iter 90 value 5273.418933
## iter 100 value 5273.132851
## final value 5273.132851
## stopped after 100 iterations
## # weights: 221
## initial value 8130.417557
## iter 10 value 5441.223758
## iter 20 value 5348.547086
## iter 30 value 5301.801866
## iter 40 value 5224.124080
## iter 50 value 5159.270973
## iter 60 value 5142.092282
## iter 70 value 5038.744123
## iter 80 value 4687.602362
## iter 90 value 4068.393684
## iter 100 value 3752.209078
## final value 3752.209078
## stopped after 100 iterations
## # weights: 331
## initial value 6789.940751
## iter 10 value 5828.503300
## iter 20 value 5319.955275
## iter 30 value 5259.534602
## iter 40 value 5228.223293
## iter 50 value 5169.642494
## iter 60 value 5146.234919
## iter 70 value 5132.383233
## iter 80 value 5123.182417
## iter 90 value 5105.148773
## iter 100 value 5093.434348
## final value 5093.434348
## stopped after 100 iterations
## # weights: 551
## initial value 9931.498561
## iter 10 value 5606.695441
## iter 20 value 5252.040874
## iter 30 value 5228.079724
## iter 40 value 5170.601395
## iter 50 value 5050.800364
## iter 60 value 4997.838110
## iter 70 value 4988.223297
## iter 80 value 4965.089046
## iter 90 value 4843.971904
## iter 100 value 4584.092429
## final value 4584.092429
## stopped after 100 iterations
## # weights: 771
## initial value 5993.923515
## iter 10 value 5651.488522
## iter 20 value 5206.429488
## iter 30 value 5177.369346
## iter 40 value 5157.607449
## iter 50 value 5111.819517
## iter 60 value 5085.513149
## iter 70 value 5032.791467
## iter 80 value 5002.089594
## iter 90 value 4978.042661
## iter 100 value 4970.370896
## final value 4970.370896
## stopped after 100 iterations
## # weights: 221
## initial value 6001.192249
## iter 10 value 5831.643987
## iter 20 value 5673.980941
## iter 30 value 5304.104590
## iter 40 value 5221.422321
## iter 50 value 5172.007336
## iter 60 value 5068.635060
## iter 70 value 5038.969415
## iter 80 value 4977.149016
## iter 90 value 4845.949872
## iter 100 value 4585.122560
## final value 4585.122560
## stopped after 100 iterations
## # weights: 331
## initial value 8598.513167
## iter 10 value 5830.225147
## iter 20 value 5823.738143
## iter 30 value 5310.763103
## iter 40 value 5309.076320
## iter 50 value 5233.145139
## iter 60 value 5227.981022
## iter 70 value 5225.301041
## iter 80 value 5126.895117
## iter 90 value 5112.086497
## iter 100 value 5106.153433
## final value 5106.153433
## stopped after 100 iterations
## # weights: 551
## initial value 7719.113823
## iter 10 value 5717.211894
## iter 20 value 5345.221032
## iter 30 value 5294.653976
## iter 40 value 5173.472582
## iter 50 value 5159.384127
## iter 60 value 5144.735077
## iter 70 value 5126.889711
## iter 80 value 5120.410588
## iter 90 value 5095.362328
## iter 100 value 5035.538761
## final value 5035.538761
## stopped after 100 iterations
## # weights: 771
## initial value 5921.163662
## iter 10 value 5832.005899
## iter 20 value 5795.602556
## iter 30 value 5419.215000
## iter 40 value 5120.697459
## iter 50 value 5110.017843
## iter 60 value 5105.865317
## iter 70 value 5099.848894
## iter 80 value 5047.302233
## iter 90 value 4894.789169
## iter 100 value 4706.273382
## final value 4706.273382
## stopped after 100 iterations
## # weights: 551
## initial value 9601.049679
## iter 10 value 8597.339241
## iter 20 value 8419.280458
## iter 30 value 8392.290174
## iter 40 value 8199.473271
## iter 50 value 8015.764802
## iter 60 value 7758.171208
## iter 70 value 7750.313576
## iter 80 value 7721.666428
## iter 90 value 7704.991074
## iter 100 value 7679.384189
## final value 7679.384189
## stopped after 100 iterations
Adult_TDA_KDE_5.60.5_n1_NN1Fit0
## Neural Network
##
## 15260 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 10173, 10174, 10173
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.8206424 0.4694431
## 2 0.5 0.8199864 0.5252703
## 2 0.7 0.7922017 0.3551556
## 3 0.3 0.8145472 0.4378342
## 3 0.5 0.8127116 0.4158853
## 3 0.7 0.8157295 0.4272610
## 5 0.3 0.8250326 0.5076988
## 5 0.5 0.8149434 0.4802810
## 5 0.7 0.8010483 0.3518693
## 7 0.3 0.8015065 0.3862242
## 7 0.5 0.7974439 0.3319158
## 7 0.7 0.8086492 0.4435108
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.3.
Adult_TDA_KDE_5.60.5_n1_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8057794 0.3726672 Fold1
## 2 0.8472577 0.5928479 Fold3
## 3 0.8220606 0.5575812 Fold2
ad_tda_kde_5.60.5_n1_nn1_fit_re<-Adult_TDA_KDE_5.60.5_n1_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n1_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 -0.01 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.01 0.53 0.00 0.00 0.00 0.00 -0.01 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 -0.01 0.00 0.00 0.00 0.00 -0.01
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 -0.01 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.01 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## -0.12 0.70 0.27 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.01 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## -0.09 -1.12 -0.13 -0.06 0.04 0.00 -0.01 0.10
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## 0.01 -0.04 0.00 0.00 -0.05 0.00 0.00 0.00
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 0.00 0.00 -0.04 -0.06 -0.24 0.00 0.23
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.12 0.00 -0.03 -0.03 -1.51 -0.05 0.00 0.22
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.00 -0.25 0.00 -0.02 -0.13 0.01 0.00 0.05
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.25 0.01 -0.03 0.01 0.02 0.00 -0.13 0.00
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.04 -0.18 0.00 0.22 -0.25 0.00 -0.08 -0.05
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 0.07 -0.05 0.02 0.00 0.00 -0.06 -0.17 0.08
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.04 0.32 0.94 -0.06 0.00 0.00 0.00 0.00
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.00 0.01 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.00 0.00 -0.04 0.00 0.00
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 0.00 -0.03 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## -0.03 -0.05 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o
## 0.18 -0.01 -1.85 0.25 1.73 0.13
vip(Adult_TDA_KDE_5.60.5_n1_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n1_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.60.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7095 1645
## >50K 321 707
##
## Accuracy : 0.7987
## 95% CI : (0.7906, 0.8066)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3185
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9567
## Specificity : 0.3006
## Pos Pred Value : 0.8118
## Neg Pred Value : 0.6877
## Prevalence : 0.7592
## Detection Rate : 0.7264
## Detection Prevalence : 0.8948
## Balanced Accuracy : 0.6287
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7095 1645
## >50K 321 707
##
## Accuracy : 0.7987
## 95% CI : (0.7906, 0.8066)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3185
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9567
## Specificity : 0.3006
## Pos Pred Value : 0.8118
## Neg Pred Value : 0.6877
## Prevalence : 0.7592
## Detection Rate : 0.7264
## Detection Prevalence : 0.8948
## Balanced Accuracy : 0.6287
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n1_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.987305e-01 3.185308e-01 7.906396e-01 8.066430e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 6.604999e-21 1.260623e-195
ad_tda_kde_5.60.5_n1_nn1_cf0_ov_acc<-ad_tda_kde_5.60.5_n1_nn1_cf0$overall[1]
ad_tda_kde_5.60.5_n1_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9567152 0.3005952 0.8117849
## Neg Pred Value Precision Recall
## 0.6877432 0.8117849 0.9567152
## F1 Prevalence Detection Rate
## 0.8783115 0.7592138 0.7263514
## Detection Prevalence Balanced Accuracy
## 0.8947584 0.6286552
ad_tda_kde_5.60.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n1_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.60.5_n1_nn1_fit_re)
diff_tda_kde_5.60.5_nn1_n1_3_fold
## Accuracy
## 1 0.002171069
## 2 0.002834414
## 3 -0.020665454
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n1_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n1_3_fold_odds.left<-bst_tda_kde_5.60.5_nn1.n1_3_fold$probLeft/bst_tda_kde_5.60.5_nn1.n1_3_fold$probRight
bst_tda_kde_5.60.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n1_3_fold
## $winLeft
## [1] 0.09123333
##
## $winRope
## [1] 0.9087667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n1_3_fold
## $left
## [1] 0.3228351
##
## $rope
## [1] 0.5621235
##
## $right
## [1] 0.1150414
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1_n1_3_fold))
#bf_tda_kde_5.60.5_nn1.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nn1_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_nn1_n1_3_fold)
## t = -0.67572, df = 2, p-value = 0.5689
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.03845844 0.02801846
## sample estimates:
## mean of x
## -0.005219991
### Test set diff
diff_tda_kde_5.60.5_nn1.n1_test<-(nn1_cf_ov_acc - ad_tda_kde_5.60.5_n1_nn1_cf0_ov_acc)
diff_tda_kde_5.60.5_nn1.n1_test
## Accuracy
## 0.006347256
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1.n1_test),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n1_test_odds.left<-bst_tda_kde_5.60.5_nn1.n1_test$probLeft/bst_tda_kde_5.60.5_nn1.n1_test$probRight
bst_tda_kde_5.60.5_nn1.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1.n1_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nn1.n1_test)))
#BayesFactor
#bf_tda_kde_5.60.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1.n1_test)) #bf_tda_pca_5.60.5_nn1.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nn1.n1_test))
##Node2
#Neural Network 1
Adult_TDA_KDE_5.60.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n3.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 5193.340449
## iter 10 value 4936.486372
## iter 20 value 4844.741276
## iter 30 value 4830.134491
## iter 40 value 4794.468352
## iter 50 value 4779.590392
## iter 60 value 4714.816033
## iter 70 value 4705.687500
## iter 80 value 4680.198096
## iter 90 value 4372.951538
## iter 100 value 3941.728216
## final value 3941.728216
## stopped after 100 iterations
## # weights: 331
## initial value 7147.332727
## iter 10 value 5158.915513
## iter 20 value 4893.940032
## iter 30 value 4823.342876
## iter 40 value 4793.924642
## iter 50 value 4736.573509
## iter 60 value 4613.140184
## iter 70 value 4004.594248
## iter 80 value 3662.764537
## iter 90 value 3595.136690
## iter 100 value 3239.958251
## final value 3239.958251
## stopped after 100 iterations
## # weights: 551
## initial value 7324.448618
## iter 10 value 5066.739781
## iter 20 value 4841.833049
## iter 30 value 4797.676440
## iter 40 value 4795.580504
## iter 50 value 4794.822404
## iter 60 value 4789.260131
## iter 70 value 4779.284198
## iter 80 value 4706.508311
## iter 90 value 4681.143599
## iter 100 value 4673.577097
## final value 4673.577097
## stopped after 100 iterations
## # weights: 771
## initial value 7151.551467
## iter 10 value 5123.034709
## iter 20 value 4763.933946
## iter 30 value 4748.088757
## iter 40 value 4711.083186
## iter 50 value 4311.244498
## iter 60 value 3721.104523
## iter 70 value 3624.054322
## iter 80 value 3591.536906
## iter 90 value 3462.683481
## iter 100 value 3254.113763
## final value 3254.113763
## stopped after 100 iterations
## # weights: 221
## initial value 6838.078807
## iter 10 value 5160.912642
## iter 20 value 5077.777187
## iter 30 value 4941.969937
## iter 40 value 4834.825668
## iter 50 value 4818.903548
## iter 60 value 4726.838183
## iter 70 value 4712.533102
## iter 80 value 4707.113203
## iter 90 value 4681.248898
## iter 100 value 4664.040955
## final value 4664.040955
## stopped after 100 iterations
## # weights: 331
## initial value 11964.773668
## iter 10 value 5130.180590
## iter 20 value 4832.842977
## iter 30 value 4807.260122
## iter 40 value 4805.439588
## iter 50 value 4789.400369
## iter 60 value 4759.969474
## iter 70 value 4716.229271
## iter 80 value 4600.564784
## iter 90 value 4583.165721
## iter 100 value 4490.959696
## final value 4490.959696
## stopped after 100 iterations
## # weights: 551
## initial value 5273.529226
## iter 10 value 5067.295964
## iter 20 value 4839.782196
## iter 30 value 4832.813048
## iter 40 value 4801.888379
## iter 50 value 4732.213867
## iter 60 value 4411.982236
## iter 70 value 3954.920346
## iter 80 value 3302.558754
## iter 90 value 3192.890814
## iter 100 value 3084.061866
## final value 3084.061866
## stopped after 100 iterations
## # weights: 771
## initial value 5294.706477
## iter 10 value 5136.277202
## iter 20 value 5134.544425
## iter 30 value 4813.788364
## iter 40 value 4768.744374
## iter 50 value 4721.001401
## iter 60 value 4703.302866
## iter 70 value 4688.402379
## iter 80 value 4671.581772
## iter 90 value 4646.397627
## iter 100 value 4538.430739
## final value 4538.430739
## stopped after 100 iterations
## # weights: 221
## initial value 7904.679838
## iter 10 value 4815.944666
## iter 20 value 4803.940466
## iter 30 value 4803.540328
## iter 40 value 4796.915599
## iter 50 value 4786.035359
## iter 60 value 4778.213282
## iter 70 value 4742.426035
## iter 80 value 4697.225228
## iter 90 value 4596.155150
## iter 100 value 4146.536997
## final value 4146.536997
## stopped after 100 iterations
## # weights: 331
## initial value 6397.609936
## iter 10 value 4821.627161
## iter 20 value 4801.658705
## iter 30 value 4754.086448
## iter 40 value 4703.677015
## iter 50 value 4619.991050
## iter 60 value 4417.985333
## iter 70 value 4330.898517
## iter 80 value 3965.917382
## iter 90 value 3701.783502
## iter 100 value 3236.982201
## final value 3236.982201
## stopped after 100 iterations
## # weights: 551
## initial value 7441.814324
## iter 10 value 5095.197020
## iter 20 value 4823.080663
## iter 30 value 4733.917656
## iter 40 value 4727.343180
## iter 50 value 4713.973545
## iter 60 value 4699.670551
## iter 70 value 4696.552468
## iter 80 value 4674.110204
## iter 90 value 4669.871935
## iter 100 value 4646.297447
## final value 4646.297447
## stopped after 100 iterations
## # weights: 771
## initial value 6868.844668
## iter 10 value 4818.271878
## iter 20 value 4801.885403
## iter 30 value 4798.159632
## iter 40 value 4797.295944
## iter 50 value 4777.870453
## iter 60 value 4759.718274
## iter 70 value 4322.111199
## iter 80 value 4052.290787
## iter 90 value 3877.756160
## iter 100 value 3443.280164
## final value 3443.280164
## stopped after 100 iterations
## # weights: 221
## initial value 5341.705882
## iter 10 value 5160.126821
## final value 5160.125276
## converged
## # weights: 331
## initial value 5356.349022
## iter 10 value 4926.333955
## iter 20 value 4791.889137
## iter 30 value 4706.373182
## iter 40 value 4646.062434
## iter 50 value 4626.187041
## iter 60 value 4616.193243
## iter 70 value 4124.730453
## iter 80 value 3918.607043
## iter 90 value 3825.476854
## iter 100 value 3296.623988
## final value 3296.623988
## stopped after 100 iterations
## # weights: 551
## initial value 5582.218251
## iter 10 value 5097.532608
## iter 20 value 4847.037718
## iter 30 value 4793.832308
## iter 40 value 4788.268120
## iter 50 value 4776.004739
## iter 60 value 4764.702499
## iter 70 value 4690.542387
## iter 80 value 4389.832050
## iter 90 value 3925.222631
## iter 100 value 3704.599487
## final value 3704.599487
## stopped after 100 iterations
## # weights: 771
## initial value 5419.344378
## iter 10 value 4988.139885
## iter 20 value 4762.631292
## iter 30 value 4719.358744
## iter 40 value 4631.668806
## iter 50 value 4582.929311
## iter 60 value 4571.341273
## iter 70 value 4565.228720
## iter 80 value 4559.048608
## iter 90 value 4555.473992
## iter 100 value 4553.726889
## final value 4553.726889
## stopped after 100 iterations
## # weights: 221
## initial value 6400.133975
## iter 10 value 5143.324076
## iter 20 value 4821.649184
## iter 30 value 4797.894805
## iter 40 value 4741.454961
## iter 50 value 4354.089069
## iter 60 value 4221.587732
## iter 70 value 4125.257152
## iter 80 value 4041.612482
## iter 90 value 3967.248627
## iter 100 value 3797.094306
## final value 3797.094306
## stopped after 100 iterations
## # weights: 331
## initial value 5529.957657
## iter 10 value 5052.941043
## iter 20 value 4848.553977
## iter 30 value 4709.033788
## iter 40 value 4641.020100
## iter 50 value 4511.299269
## iter 60 value 4186.138737
## iter 70 value 3566.320316
## iter 80 value 3414.131038
## iter 90 value 3247.262659
## iter 100 value 3153.837504
## final value 3153.837504
## stopped after 100 iterations
## # weights: 551
## initial value 5333.432842
## iter 10 value 4982.494817
## iter 20 value 4826.021216
## iter 30 value 4784.417052
## iter 40 value 4754.122634
## iter 50 value 4712.852057
## iter 60 value 4620.262928
## iter 70 value 4590.756844
## iter 80 value 4216.767024
## iter 90 value 3670.653749
## iter 100 value 3332.828725
## final value 3332.828725
## stopped after 100 iterations
## # weights: 771
## initial value 5238.445550
## iter 10 value 4852.480752
## iter 20 value 4762.834198
## iter 30 value 4735.047808
## iter 40 value 4712.507362
## iter 50 value 4451.755393
## iter 60 value 4059.211299
## iter 70 value 3743.523197
## iter 80 value 3450.717152
## iter 90 value 3415.328569
## iter 100 value 3390.430494
## final value 3390.430494
## stopped after 100 iterations
## # weights: 221
## initial value 5689.631784
## iter 10 value 4941.695197
## iter 20 value 4788.243538
## iter 30 value 4783.501385
## iter 40 value 4761.743598
## iter 50 value 4669.543709
## iter 60 value 4441.320855
## iter 70 value 4212.746885
## iter 80 value 3960.157138
## iter 90 value 3822.855600
## iter 100 value 3539.608561
## final value 3539.608561
## stopped after 100 iterations
## # weights: 331
## initial value 5657.018857
## iter 10 value 5162.438775
## iter 20 value 5160.812570
## iter 30 value 5160.767320
## iter 40 value 5137.002767
## iter 50 value 5117.281323
## iter 60 value 4797.232724
## iter 70 value 4773.863013
## iter 80 value 4483.113148
## iter 90 value 3551.311945
## iter 100 value 3296.557869
## final value 3296.557869
## stopped after 100 iterations
## # weights: 551
## initial value 5350.486777
## iter 10 value 5076.776642
## iter 20 value 4840.826813
## iter 30 value 4791.753361
## iter 40 value 4781.103999
## iter 50 value 4759.041674
## iter 60 value 4723.434649
## iter 70 value 4712.893127
## iter 80 value 4709.675263
## iter 90 value 4654.553912
## iter 100 value 4635.112804
## final value 4635.112804
## stopped after 100 iterations
## # weights: 771
## initial value 7650.707541
## iter 10 value 5183.093941
## iter 20 value 4998.571804
## iter 30 value 4811.252854
## iter 40 value 4796.952232
## iter 50 value 4786.777092
## iter 60 value 4758.074763
## iter 70 value 4730.594379
## iter 80 value 4698.267288
## iter 90 value 4363.759523
## iter 100 value 3862.807015
## final value 3862.807015
## stopped after 100 iterations
## # weights: 221
## initial value 6037.956388
## iter 10 value 5107.718980
## iter 20 value 4733.300557
## iter 30 value 4711.567219
## iter 40 value 4707.946793
## iter 50 value 4697.979141
## iter 60 value 4675.726429
## iter 70 value 4630.521228
## iter 80 value 4536.668557
## iter 90 value 3978.526732
## iter 100 value 3740.321946
## final value 3740.321946
## stopped after 100 iterations
## # weights: 331
## initial value 8216.978974
## iter 10 value 5106.709779
## iter 20 value 4792.735224
## iter 30 value 4747.378126
## iter 40 value 4292.090004
## iter 50 value 4020.017917
## iter 60 value 3925.577641
## iter 70 value 3872.731754
## iter 80 value 3772.225544
## iter 90 value 3401.112503
## iter 100 value 3225.718291
## final value 3225.718291
## stopped after 100 iterations
## # weights: 551
## initial value 5330.839724
## iter 10 value 5158.508668
## iter 20 value 4807.851912
## iter 30 value 4707.242807
## iter 40 value 4680.219031
## iter 50 value 4660.352779
## iter 60 value 4656.808708
## iter 70 value 4635.733650
## iter 80 value 4618.541823
## iter 90 value 4610.894474
## iter 100 value 4465.335788
## final value 4465.335788
## stopped after 100 iterations
## # weights: 771
## initial value 7217.185246
## iter 10 value 4862.247406
## iter 20 value 4795.674221
## iter 30 value 4731.386274
## iter 40 value 4552.366713
## iter 50 value 4443.204867
## iter 60 value 4103.096979
## iter 70 value 3559.348937
## iter 80 value 3375.076505
## iter 90 value 3276.777831
## iter 100 value 3173.772519
## final value 3173.772519
## stopped after 100 iterations
## # weights: 221
## initial value 5762.169066
## iter 10 value 5158.125798
## iter 20 value 4826.106826
## iter 30 value 4778.151924
## iter 40 value 4654.168871
## iter 50 value 3927.748750
## iter 60 value 3312.012336
## iter 70 value 3145.847559
## iter 80 value 3073.331961
## iter 90 value 3029.938307
## iter 100 value 3021.247846
## final value 3021.247846
## stopped after 100 iterations
## # weights: 331
## initial value 5220.143561
## iter 10 value 5160.723662
## iter 20 value 5159.722954
## iter 30 value 4952.593014
## iter 40 value 4744.635247
## iter 50 value 4719.890708
## iter 60 value 4704.506425
## iter 70 value 4701.153289
## iter 80 value 4580.711443
## iter 90 value 4330.909738
## iter 100 value 4017.274628
## final value 4017.274628
## stopped after 100 iterations
## # weights: 551
## initial value 5213.083113
## iter 10 value 4950.741088
## iter 20 value 4936.925383
## iter 30 value 4788.798323
## iter 40 value 4783.187105
## iter 50 value 4781.699014
## iter 60 value 4778.775773
## iter 70 value 4762.701892
## iter 80 value 4750.201202
## iter 90 value 4726.949517
## iter 100 value 4690.064954
## final value 4690.064954
## stopped after 100 iterations
## # weights: 771
## initial value 7343.229216
## iter 10 value 5123.928280
## iter 20 value 4827.110087
## iter 30 value 4760.516747
## iter 40 value 4668.119112
## iter 50 value 4621.684796
## iter 60 value 4590.786033
## iter 70 value 4519.177035
## iter 80 value 4505.074832
## iter 90 value 4438.961762
## iter 100 value 4218.812791
## final value 4218.812791
## stopped after 100 iterations
## # weights: 221
## initial value 5650.898726
## iter 10 value 5125.818662
## iter 20 value 4744.312223
## iter 30 value 4688.430884
## iter 40 value 4592.034474
## iter 50 value 4168.553132
## iter 60 value 3539.111899
## iter 70 value 3260.510140
## iter 80 value 3189.399400
## iter 90 value 3147.536207
## iter 100 value 3111.359474
## final value 3111.359474
## stopped after 100 iterations
## # weights: 331
## initial value 5390.509193
## iter 10 value 5133.195136
## iter 20 value 4802.421460
## iter 30 value 4784.097060
## iter 40 value 4782.973256
## iter 50 value 4771.048145
## iter 60 value 4678.901752
## iter 70 value 4675.218484
## iter 80 value 4668.407536
## iter 90 value 4662.533935
## iter 100 value 4660.091917
## final value 4660.091917
## stopped after 100 iterations
## # weights: 551
## initial value 7580.374538
## iter 10 value 5106.253122
## iter 20 value 4888.228691
## iter 30 value 4810.550139
## iter 40 value 4801.894520
## iter 50 value 4711.654011
## iter 60 value 4688.927610
## iter 70 value 4685.655642
## iter 80 value 4667.843694
## iter 90 value 4622.696503
## iter 100 value 4534.904029
## final value 4534.904029
## stopped after 100 iterations
## # weights: 771
## initial value 6890.697079
## iter 10 value 5149.981570
## iter 20 value 4862.015161
## iter 30 value 4820.558610
## iter 40 value 4802.707040
## iter 50 value 4704.379172
## iter 60 value 4679.051090
## iter 70 value 4678.896736
## iter 80 value 4671.380618
## iter 90 value 4668.331269
## iter 100 value 4666.111040
## final value 4666.111040
## stopped after 100 iterations
## # weights: 331
## initial value 8295.375385
## iter 10 value 7456.265098
## iter 20 value 7383.345089
## iter 30 value 7069.446093
## iter 40 value 7037.460188
## iter 50 value 7024.961832
## iter 60 value 7022.798127
## iter 70 value 7022.331042
## iter 80 value 7014.293025
## iter 90 value 6729.202377
## iter 100 value 5779.900414
## final value 5779.900414
## stopped after 100 iterations
Adult_TDA_KDE_5.60.5_n2_NN1Fit0
## Neural Network
##
## 13266 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8844, 8844, 8844
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.7721996 0.3282817
## 2 0.5 0.8033318 0.4517339
## 2 0.7 0.8002412 0.4691387
## 3 0.3 0.8249661 0.5626807
## 3 0.5 0.7913463 0.4642706
## 3 0.7 0.8028795 0.4715081
## 5 0.3 0.7689582 0.2538023
## 5 0.5 0.8074778 0.4525695
## 5 0.7 0.7708428 0.2524926
## 7 0.3 0.7979044 0.4159694
## 7 0.5 0.7947384 0.3558387
## 7 0.7 0.7801900 0.3189116
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.3.
Adult_TDA_KDE_5.60.5_n2_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8256445 0.5347792 Fold3
## 2 0.8276798 0.5828865 Fold2
## 3 0.8215739 0.5703763 Fold1
ad_tda_kde_5.60.5_n2_nn1_fit_re<-Adult_TDA_KDE_5.60.5_n2_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n2_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.01 -0.83 0.05 0.11 0.03 0.00 -0.19 -0.02
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.07 -0.05 0.00 0.00 0.00 -0.06 -0.11 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 -0.81 0.17 0.60 0.00 -0.30
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## -0.26 0.00 0.00 0.78 0.09 -0.49 0.00 -0.60
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 1.17 -0.22 0.15 0.05 0.33 0.00 0.04
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.24 -0.17 -0.14 -0.21 0.14 -0.06 -0.72 0.04
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.46 0.11 -0.12 0.36 0.48 -0.02 0.26 -0.03
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## -1.05 -0.06 0.24 0.00 0.00 -0.17 -0.98 0.99
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.13 0.40 -1.38 0.12 0.19 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.15 0.00 0.00 -0.07 0.00 0.00 0.06 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.13 -0.10 0.00 0.00 0.00 0.00 0.00 0.12
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.12 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 -0.72 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 2.23 0.17 0.15 -1.08 -0.15 0.00 0.02 1.32
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.44 1.43 0.09 0.00 0.00 0.67 2.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 1.27 1.28 -3.39 0.00 1.16
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -2.12 0.00 0.00 1.35 0.27 -6.28 0.23 -7.04
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 4.70 3.54 0.79 6.28 0.15 0.13 -0.03 0.75
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## -2.45 2.25 1.67 1.66 1.69 0.19 -3.31 -0.03
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## -1.15 0.37 0.32 -1.43 -2.62 5.76 4.27 3.83
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -7.59 -0.99 -0.59 2.80 0.36 0.65 4.14 -1.91
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## -0.11 -0.68 -0.21 -0.14 -0.94 -0.52 0.09 0.81
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## -0.23 0.24 0.00 0.10 -0.80 -0.63 1.09 0.67
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.24 0.51 0.01 0.01 -0.06 0.07 -0.76 -0.28
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## -0.38 0.76 0.53 0.58 0.23 1.24 0.14 -0.20
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## -0.14 -1.64 0.65 -0.10 0.60 0.38 0.27 0.45
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.03 0.29 -1.37 0.14 0.31
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.01 0.88 0.00 0.00 0.02 0.00 -0.04 0.03
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## -0.01 0.01 0.00 -0.03 0.00 -0.07 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.09 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 -0.01 0.65 0.01 0.00 0.06
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 -0.05 0.00 0.00 0.00 0.02 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## -0.01 0.00 0.04 -0.07 0.00 0.00 0.01 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.03 0.00 0.00 0.06 0.02 0.00 0.00 -0.06
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.06 0.00 -0.05 0.02 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.84 0.00 2.06 0.04 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 -0.03 0.00 0.00
## b->o h1->o h2->o h3->o
## -0.23 1.01 -3.73 1.05
vip(Adult_TDA_KDE_5.60.5_n2_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n2_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.60.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7143 1678
## >50K 273 674
##
## Accuracy : 0.8003
## 95% CI : (0.7922, 0.8082)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3137
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9632
## Specificity : 0.2866
## Pos Pred Value : 0.8098
## Neg Pred Value : 0.7117
## Prevalence : 0.7592
## Detection Rate : 0.7313
## Detection Prevalence : 0.9031
## Balanced Accuracy : 0.6249
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7143 1678
## >50K 273 674
##
## Accuracy : 0.8003
## 95% CI : (0.7922, 0.8082)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3137
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9632
## Specificity : 0.2866
## Pos Pred Value : 0.8098
## Neg Pred Value : 0.7117
## Prevalence : 0.7592
## Detection Rate : 0.7313
## Detection Prevalence : 0.9031
## Balanced Accuracy : 0.6249
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n2_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.002662e-01 3.137412e-01 7.921975e-01 8.081554e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.888881e-22 1.004573e-221
ad_tda_kde_5.60.5_n2_nn1_cf0_ov_acc<-ad_tda_kde_5.60.5_n2_nn1_cf0$overall[1]
ad_tda_kde_5.60.5_n2_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9631877 0.2865646 0.8097721
## Neg Pred Value Precision Recall
## 0.7117212 0.8097721 0.9631877
## F1 Prevalence Detection Rate
## 0.8798423 0.7592138 0.7312654
## Detection Prevalence Balanced Accuracy
## 0.9030508 0.6248762
ad_tda_kde_5.60.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n2_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.60.5_n2_nn1_fit_re)
diff_tda_kde_5.60.5_nn1_n2_3_fold
## Accuracy
## 1 -0.01769400
## 2 0.02241235
## 3 -0.02017884
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n2_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n2_3_fold_odds.left<-bst_tda_kde_5.60.5_nn1.n2_3_fold$probLeft/bst_tda_kde_5.60.5_nn1.n2_3_fold$probRight
bst_tda_kde_5.60.5_nn1.n2_3_fold_odds.left
## [1] 2
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n2_3_fold
## $winLeft
## [1] 0.4894667
##
## $winRope
## [1] 0.3753667
##
## $winRight
## [1] 0.1351667
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n2_3_fold
## $left
## [1] 0.3948839
##
## $rope
## [1] 0.3840987
##
## $right
## [1] 0.2210175
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1_n2_3_fold))
#bf_tda_kde_5.60.5_nn1.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nn1_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_nn1_n2_3_fold)
## t = -0.3734, df = 2, p-value = 0.7447
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.06453689 0.05422989
## sample estimates:
## mean of x
## -0.005153499
### Test set diff
diff_tda_kde_5.60.5_nn1.n2_test<-(nn1_cf_ov_acc - ad_tda_kde_5.60.5_n2_nn1_cf0_ov_acc)
diff_tda_kde_5.60.5_nn1.n2_test
## Accuracy
## 0.00481163
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1.n2_test),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n2_test_odds.left<-bst_tda_kde_5.60.5_nn1.n2_test$probLeft/bst_tda_kde_5.60.5_nn1.n2_test$probRight
bst_tda_kde_5.60.5_nn1.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1.n2_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nn1.n2_test)))
#BayesFactor
#bf_tda_kde_5.60.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1.n2_test)) #bf_tda_pca_5.60.5_nn1.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nn1.n2_test))
##Node3
#Neural Network 1
Adult_TDA_KDE_5.60.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n3.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 5173.766904
## iter 10 value 5158.816215
## iter 20 value 4859.222698
## iter 30 value 4818.377281
## iter 40 value 4755.842411
## iter 50 value 4713.355156
## iter 60 value 4698.288805
## iter 70 value 4674.146151
## iter 80 value 4637.643065
## iter 90 value 4623.070153
## iter 100 value 4615.723013
## final value 4615.723013
## stopped after 100 iterations
## # weights: 331
## initial value 6473.410458
## iter 10 value 5081.157282
## iter 20 value 4846.996978
## iter 30 value 4835.899220
## iter 40 value 4820.906722
## iter 50 value 4735.442308
## iter 60 value 4440.604673
## iter 70 value 4019.504968
## iter 80 value 3699.328476
## iter 90 value 3340.701701
## iter 100 value 3132.626094
## final value 3132.626094
## stopped after 100 iterations
## # weights: 551
## initial value 6017.875276
## iter 10 value 5160.548788
## iter 20 value 5110.636949
## iter 30 value 4869.342808
## iter 40 value 4743.117242
## iter 50 value 4696.106404
## iter 60 value 4668.513683
## iter 70 value 4666.026188
## iter 80 value 4656.849161
## iter 90 value 4655.154700
## iter 100 value 4648.408940
## final value 4648.408940
## stopped after 100 iterations
## # weights: 771
## initial value 5606.139581
## iter 10 value 4850.698808
## iter 20 value 4819.807601
## iter 30 value 4815.602466
## iter 40 value 4814.749279
## iter 50 value 4814.372622
## iter 60 value 4812.555697
## iter 70 value 4788.451961
## iter 80 value 4724.402936
## iter 90 value 4716.811297
## iter 100 value 4709.406579
## final value 4709.406579
## stopped after 100 iterations
## # weights: 221
## initial value 6525.535799
## iter 10 value 5107.919528
## iter 20 value 4811.726587
## iter 30 value 4748.623192
## iter 40 value 4738.762459
## iter 50 value 4721.771950
## iter 60 value 4713.150235
## iter 70 value 4712.956961
## iter 80 value 4712.369356
## iter 90 value 4708.805408
## iter 100 value 4707.579416
## final value 4707.579416
## stopped after 100 iterations
## # weights: 331
## initial value 5303.280547
## iter 10 value 4864.669602
## iter 20 value 4831.076164
## iter 30 value 4827.866474
## iter 40 value 4813.222292
## iter 50 value 4742.786790
## iter 60 value 4727.052257
## iter 70 value 4715.456030
## iter 80 value 4664.752186
## iter 90 value 4650.043181
## iter 100 value 4647.433538
## final value 4647.433538
## stopped after 100 iterations
## # weights: 551
## initial value 8174.301694
## iter 10 value 5130.592507
## iter 20 value 4840.920013
## iter 30 value 4818.764225
## iter 40 value 4693.733923
## iter 50 value 4629.930675
## iter 60 value 4423.433974
## iter 70 value 3898.091451
## iter 80 value 3427.972892
## iter 90 value 3297.886764
## iter 100 value 3150.120483
## final value 3150.120483
## stopped after 100 iterations
## # weights: 771
## initial value 8390.437739
## iter 10 value 5109.960956
## iter 20 value 4813.301832
## iter 30 value 4665.327913
## iter 40 value 4650.584771
## iter 50 value 4571.520044
## iter 60 value 4205.611360
## iter 70 value 3624.966054
## iter 80 value 3462.497793
## iter 90 value 3442.302391
## iter 100 value 3259.590462
## final value 3259.590462
## stopped after 100 iterations
## # weights: 221
## initial value 5346.783662
## iter 10 value 5129.428945
## iter 20 value 4922.853497
## iter 30 value 4808.906661
## iter 40 value 4706.918042
## iter 50 value 4631.363376
## iter 60 value 4490.585549
## iter 70 value 4317.787384
## iter 80 value 4098.698692
## iter 90 value 3675.588540
## iter 100 value 3461.825948
## final value 3461.825948
## stopped after 100 iterations
## # weights: 331
## initial value 10194.797571
## iter 10 value 5119.705490
## iter 20 value 5115.485470
## iter 30 value 4835.996230
## iter 40 value 4817.261612
## iter 50 value 4816.276311
## iter 60 value 4810.130933
## iter 70 value 4755.048082
## iter 80 value 4732.863782
## iter 90 value 4259.012968
## iter 100 value 3895.216061
## final value 3895.216061
## stopped after 100 iterations
## # weights: 551
## initial value 5343.417062
## iter 10 value 5160.728366
## iter 20 value 5160.603501
## iter 30 value 4842.008824
## iter 40 value 4829.558196
## iter 50 value 4794.498955
## iter 60 value 4759.063462
## iter 70 value 4620.066940
## iter 80 value 4481.737409
## iter 90 value 4377.218047
## iter 100 value 3614.297230
## final value 3614.297230
## stopped after 100 iterations
## # weights: 771
## initial value 8211.602652
## iter 10 value 5066.000314
## iter 20 value 4864.590160
## iter 30 value 4776.572503
## iter 40 value 4603.789840
## iter 50 value 3866.000062
## iter 60 value 3781.735694
## iter 70 value 3740.361191
## iter 80 value 3486.621202
## iter 90 value 3362.647843
## iter 100 value 3331.346553
## final value 3331.346553
## stopped after 100 iterations
## # weights: 221
## initial value 6069.508516
## iter 10 value 5103.854467
## iter 20 value 4800.133279
## iter 30 value 4795.197616
## iter 40 value 4773.635268
## iter 50 value 4716.299034
## iter 60 value 4647.857081
## iter 70 value 4392.526438
## iter 80 value 4055.565440
## iter 90 value 3430.894220
## iter 100 value 3332.590956
## final value 3332.590956
## stopped after 100 iterations
## # weights: 331
## initial value 5389.296505
## iter 10 value 5160.129962
## iter 20 value 4840.753408
## iter 30 value 4800.483957
## iter 40 value 4700.853170
## iter 50 value 4680.255132
## iter 60 value 4671.433701
## iter 70 value 4653.474790
## iter 80 value 4639.389487
## iter 90 value 4622.226738
## iter 100 value 4595.027535
## final value 4595.027535
## stopped after 100 iterations
## # weights: 551
## initial value 5217.939419
## iter 10 value 5160.557336
## iter 20 value 5110.426602
## iter 30 value 4825.214856
## iter 40 value 4797.591742
## iter 50 value 4739.774930
## iter 60 value 4640.124585
## iter 70 value 4541.871376
## iter 80 value 4170.998239
## iter 90 value 3591.545812
## iter 100 value 3237.889131
## final value 3237.889131
## stopped after 100 iterations
## # weights: 771
## initial value 5305.678551
## iter 10 value 5133.265681
## iter 20 value 5081.869232
## iter 30 value 4938.044090
## iter 40 value 4740.202423
## iter 50 value 3964.483394
## iter 60 value 3631.225591
## iter 70 value 3499.379503
## iter 80 value 3221.991772
## iter 90 value 3125.198982
## iter 100 value 3115.078415
## final value 3115.078415
## stopped after 100 iterations
## # weights: 221
## initial value 5460.833002
## iter 10 value 5106.629361
## iter 20 value 4779.260753
## iter 30 value 4774.494329
## iter 40 value 4762.975597
## iter 50 value 4751.408937
## iter 60 value 4737.934387
## iter 70 value 4720.876773
## iter 80 value 4706.735032
## iter 90 value 4647.366537
## iter 100 value 4572.360635
## final value 4572.360635
## stopped after 100 iterations
## # weights: 331
## initial value 5862.998534
## iter 10 value 4876.519444
## iter 20 value 4785.198638
## iter 30 value 4747.968480
## iter 40 value 4513.503940
## iter 50 value 4261.186308
## iter 60 value 3624.435952
## iter 70 value 3311.345206
## iter 80 value 3183.757727
## iter 90 value 3090.652376
## iter 100 value 3031.725465
## final value 3031.725465
## stopped after 100 iterations
## # weights: 551
## initial value 5609.985654
## iter 10 value 5081.825345
## iter 20 value 4804.065631
## iter 30 value 4734.267018
## iter 40 value 4720.585339
## iter 50 value 4699.238611
## iter 60 value 4574.624416
## iter 70 value 3970.248271
## iter 80 value 3424.890035
## iter 90 value 3253.875625
## iter 100 value 3140.496370
## final value 3140.496370
## stopped after 100 iterations
## # weights: 771
## initial value 5313.639793
## iter 10 value 5163.112279
## iter 20 value 5160.601242
## iter 30 value 5160.572249
## iter 40 value 5027.889007
## iter 50 value 4835.199193
## iter 60 value 4818.608820
## iter 70 value 4800.718813
## iter 80 value 4725.594982
## iter 90 value 4465.313560
## iter 100 value 4086.070467
## final value 4086.070467
## stopped after 100 iterations
## # weights: 221
## initial value 6611.175021
## iter 10 value 5162.183725
## iter 20 value 4823.917093
## iter 30 value 4823.236463
## iter 40 value 4799.723621
## iter 50 value 4791.555781
## iter 60 value 4783.909180
## iter 70 value 4770.033642
## iter 80 value 4747.289167
## iter 90 value 4728.203259
## iter 100 value 4718.829418
## final value 4718.829418
## stopped after 100 iterations
## # weights: 331
## initial value 5316.454484
## iter 10 value 4990.990209
## iter 20 value 4921.804940
## iter 30 value 4884.544599
## iter 40 value 4778.772267
## iter 50 value 4729.438475
## iter 60 value 4705.790516
## iter 70 value 4691.339845
## iter 80 value 4624.034337
## iter 90 value 4606.822283
## iter 100 value 4589.307660
## final value 4589.307660
## stopped after 100 iterations
## # weights: 551
## initial value 6719.038565
## iter 10 value 5156.786373
## iter 20 value 5031.272551
## iter 30 value 4731.992808
## iter 40 value 4704.381066
## iter 50 value 4679.064253
## iter 60 value 4604.304619
## iter 70 value 4519.123838
## iter 80 value 4361.836924
## iter 90 value 4338.105353
## iter 100 value 4038.390421
## final value 4038.390421
## stopped after 100 iterations
## # weights: 771
## initial value 8126.441918
## iter 10 value 5006.578895
## iter 20 value 4816.440157
## iter 30 value 4756.577511
## iter 40 value 4716.501376
## iter 50 value 4702.462131
## iter 60 value 4698.065875
## iter 70 value 4693.123402
## iter 80 value 4692.076827
## iter 90 value 4670.177575
## iter 100 value 4635.668197
## final value 4635.668197
## stopped after 100 iterations
## # weights: 221
## initial value 9610.521905
## iter 10 value 5130.617025
## iter 20 value 4768.834286
## iter 30 value 4768.658673
## iter 30 value 4768.658642
## iter 40 value 4708.963354
## iter 50 value 4678.762612
## iter 60 value 4672.183220
## iter 70 value 4668.924015
## iter 80 value 4668.805343
## iter 90 value 4656.489511
## iter 100 value 4643.771305
## final value 4643.771305
## stopped after 100 iterations
## # weights: 331
## initial value 7004.895894
## iter 10 value 4832.664372
## iter 20 value 4752.800882
## iter 30 value 4687.890904
## iter 40 value 4656.793432
## iter 50 value 4616.967166
## iter 60 value 4494.977650
## iter 70 value 3827.926500
## iter 80 value 3655.235798
## iter 90 value 3637.388920
## iter 100 value 3632.590039
## final value 3632.590039
## stopped after 100 iterations
## # weights: 551
## initial value 6996.454423
## iter 10 value 5073.795339
## iter 20 value 4763.964230
## iter 30 value 4631.110190
## iter 40 value 4615.561688
## iter 50 value 4588.670505
## iter 60 value 4570.612553
## iter 70 value 4496.418226
## iter 80 value 4326.353217
## iter 90 value 4144.571180
## iter 100 value 3951.657097
## final value 3951.657097
## stopped after 100 iterations
## # weights: 771
## initial value 6088.615002
## iter 10 value 4987.773619
## iter 20 value 4787.218937
## iter 30 value 4140.693195
## iter 40 value 3855.462005
## iter 50 value 3518.562774
## iter 60 value 3208.522152
## iter 70 value 3074.905171
## iter 80 value 2993.458940
## iter 90 value 2947.431500
## iter 100 value 2925.228368
## final value 2925.228368
## stopped after 100 iterations
## # weights: 221
## initial value 5405.147434
## iter 10 value 5131.962214
## iter 20 value 4876.019258
## iter 30 value 4741.806599
## iter 40 value 4677.683499
## iter 50 value 4669.867773
## iter 60 value 4665.654576
## iter 70 value 4655.163411
## iter 80 value 4585.818421
## iter 90 value 4567.285966
## iter 100 value 4557.267021
## final value 4557.267021
## stopped after 100 iterations
## # weights: 331
## initial value 8491.471015
## iter 10 value 4962.411652
## iter 20 value 4770.790608
## iter 30 value 4764.188464
## iter 40 value 4760.904696
## iter 50 value 4733.697886
## iter 60 value 4685.381204
## iter 70 value 4499.232181
## iter 80 value 4441.459263
## iter 90 value 4437.750314
## iter 100 value 4432.863298
## final value 4432.863298
## stopped after 100 iterations
## # weights: 551
## initial value 10081.256486
## iter 10 value 4833.145093
## iter 20 value 4794.508197
## iter 30 value 4781.055085
## iter 40 value 4771.838805
## iter 50 value 4731.610235
## iter 60 value 4691.035675
## iter 70 value 4659.613111
## iter 80 value 4652.504580
## iter 90 value 4645.409961
## iter 100 value 4623.792382
## final value 4623.792382
## stopped after 100 iterations
## # weights: 771
## initial value 5416.411293
## iter 10 value 4934.159339
## iter 20 value 4858.604745
## iter 30 value 4753.756822
## iter 40 value 4730.596020
## iter 50 value 4722.447397
## iter 60 value 4714.466157
## iter 70 value 4679.937929
## iter 80 value 4652.322341
## iter 90 value 4646.412438
## iter 100 value 4572.067554
## final value 4572.067554
## stopped after 100 iterations
## # weights: 221
## initial value 9074.084029
## iter 10 value 5105.426107
## iter 20 value 5104.671561
## iter 30 value 5104.609901
## final value 5104.609630
## converged
## # weights: 331
## initial value 5920.996942
## iter 10 value 5165.770914
## iter 20 value 5160.863691
## iter 30 value 5160.807081
## iter 40 value 4875.015002
## iter 50 value 4861.057157
## iter 60 value 4837.850373
## iter 70 value 4731.210855
## iter 80 value 4677.285698
## iter 90 value 4476.576051
## iter 100 value 4281.581250
## final value 4281.581250
## stopped after 100 iterations
## # weights: 551
## initial value 5297.434190
## iter 10 value 5135.952721
## iter 20 value 4756.848384
## iter 30 value 4725.756364
## iter 40 value 4685.750972
## iter 50 value 4683.707007
## iter 60 value 4649.891185
## iter 70 value 4635.739719
## iter 80 value 4502.762235
## iter 90 value 4254.356674
## iter 100 value 3698.730849
## final value 3698.730849
## stopped after 100 iterations
## # weights: 771
## initial value 5377.433299
## iter 10 value 4790.247837
## iter 20 value 4680.294858
## iter 30 value 4653.277620
## iter 40 value 4621.873948
## iter 50 value 4555.805352
## iter 60 value 3988.863883
## iter 70 value 3626.102759
## iter 80 value 3258.933049
## iter 90 value 3109.087623
## iter 100 value 3022.364999
## final value 3022.364999
## stopped after 100 iterations
## # weights: 771
## initial value 11384.274297
## iter 10 value 7675.831343
## iter 20 value 7249.699722
## iter 30 value 7212.071632
## iter 40 value 7026.256588
## iter 50 value 6998.345823
## iter 60 value 6963.350936
## iter 70 value 6955.592972
## iter 80 value 6951.480647
## iter 90 value 6942.576323
## iter 100 value 6936.759958
## final value 6936.759958
## stopped after 100 iterations
Adult_TDA_KDE_5.60.5_n3_NN1Fit0
## Neural Network
##
## 13266 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8844, 8844, 8844
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.7881049 0.3750234
## 2 0.5 0.7777024 0.2872715
## 2 0.7 0.7822252 0.2715509
## 3 0.3 0.8089100 0.4550476
## 3 0.5 0.7921755 0.3931054
## 3 0.7 0.7727273 0.2334769
## 5 0.3 0.7850143 0.3307373
## 5 0.5 0.8086839 0.4462745
## 5 0.7 0.7942861 0.3960732
## 7 0.3 0.8126036 0.4821764
## 7 0.5 0.7861450 0.3436784
## 7 0.7 0.8107191 0.4633917
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.3.
Adult_TDA_KDE_5.60.5_n3_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8290366 0.5712893 Fold2
## 2 0.7761194 0.3017683 Fold1
## 3 0.8326549 0.5734715 Fold3
ad_tda_kde_5.60.5_n3_nn1_fit_re<-Adult_TDA_KDE_5.60.5_n3_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n3_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 -0.04 0.00 0.00 0.00 0.00 -0.01 0.01
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.01 0.00 0.00 0.00 0.02 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.01 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 -0.01 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## -0.18 0.00 0.05 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## -0.29 -0.10 0.00 0.09 0.16 0.00 -0.62 0.09
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.12 -0.11 -0.02 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.02 -0.18 0.43 0.00 -0.32
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -0.15 0.00 0.00 -0.08 -2.02 -0.65 0.00 0.64
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## -0.01 -0.39 0.12 0.00 0.00 -0.11 0.00 -0.55
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.37 0.00 -0.15 0.04 0.02 0.00 -0.36 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.43 0.01 0.01 0.82 -0.80 0.00 0.00 -0.13
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -0.17 0.00 0.00 -0.14 0.00 -0.15 -0.34 0.05
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.08 -0.17 0.12 0.00 0.14 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 -0.12 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 -0.42 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.00 0.00 0.00 0.00 0.00
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.00 0.10 0.00 0.00 0.00 0.00 0.01 -0.01
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 0.00 -0.03 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 0.00 -0.01 0.00 0.00
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 0.00 0.00 0.00 -0.02 0.00 0.00 0.01
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.00 -0.01 0.00 0.00 0.00 -0.01 0.00 0.01
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## -0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.00 0.00 0.00 0.01 -0.01 0.00 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.00 0.00 0.00 -0.01 0.00 0.01 0.00 0.00
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.75 0.02 -0.13 0.00 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.00 0.00 0.00
## b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6 i7->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h6 i9->h6 i10->h6 i11->h6 i12->h6 i13->h6 i14->h6 i15->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h6 i17->h6 i18->h6 i19->h6 i20->h6 i21->h6 i22->h6 i23->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h6 i25->h6 i26->h6 i27->h6 i28->h6 i29->h6 i30->h6 i31->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h6 i33->h6 i34->h6 i35->h6 i36->h6 i37->h6 i38->h6 i39->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h6 i41->h6 i42->h6 i43->h6 i44->h6 i45->h6 i46->h6 i47->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h6 i49->h6 i50->h6 i51->h6 i52->h6 i53->h6 i54->h6 i55->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h6 i57->h6 i58->h6 i59->h6 i60->h6 i61->h6 i62->h6 i63->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h6 i65->h6 i66->h6 i67->h6 i68->h6 i69->h6 i70->h6 i71->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h6 i73->h6 i74->h6 i75->h6 i76->h6 i77->h6 i78->h6 i79->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h6 i81->h6 i82->h6 i83->h6 i84->h6 i85->h6 i86->h6 i87->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h6 i89->h6 i90->h6 i91->h6 i92->h6 i93->h6 i94->h6 i95->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h6 i97->h6 i98->h6 i99->h6 i100->h6 i101->h6 i102->h6 i103->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6
## 0.00 0.00 0.00 0.00 0.00
## b->h7 i1->h7 i2->h7 i3->h7 i4->h7 i5->h7 i6->h7 i7->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h7 i9->h7 i10->h7 i11->h7 i12->h7 i13->h7 i14->h7 i15->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h7 i17->h7 i18->h7 i19->h7 i20->h7 i21->h7 i22->h7 i23->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h7 i25->h7 i26->h7 i27->h7 i28->h7 i29->h7 i30->h7 i31->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h7 i33->h7 i34->h7 i35->h7 i36->h7 i37->h7 i38->h7 i39->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h7 i41->h7 i42->h7 i43->h7 i44->h7 i45->h7 i46->h7 i47->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h7 i49->h7 i50->h7 i51->h7 i52->h7 i53->h7 i54->h7 i55->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h7 i57->h7 i58->h7 i59->h7 i60->h7 i61->h7 i62->h7 i63->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h7 i65->h7 i66->h7 i67->h7 i68->h7 i69->h7 i70->h7 i71->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h7 i73->h7 i74->h7 i75->h7 i76->h7 i77->h7 i78->h7 i79->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h7 i81->h7 i82->h7 i83->h7 i84->h7 i85->h7 i86->h7 i87->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h7 i89->h7 i90->h7 i91->h7 i92->h7 i93->h7 i94->h7 i95->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h7 i97->h7 i98->h7 i99->h7 i100->h7 i101->h7 i102->h7 i103->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o
## -0.10 -1.01 1.68 0.02 -0.07 1.07 0.00 -0.11
vip(Adult_TDA_KDE_5.60.5_n3_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n3_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.60.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7147 1675
## >50K 269 677
##
## Accuracy : 0.801
## 95% CI : (0.7929, 0.8089)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3161
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9637
## Specificity : 0.2878
## Pos Pred Value : 0.8101
## Neg Pred Value : 0.7156
## Prevalence : 0.7592
## Detection Rate : 0.7317
## Detection Prevalence : 0.9032
## Balanced Accuracy : 0.6258
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7147 1675
## >50K 269 677
##
## Accuracy : 0.801
## 95% CI : (0.7929, 0.8089)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3161
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9637
## Specificity : 0.2878
## Pos Pred Value : 0.8101
## Neg Pred Value : 0.7156
## Prevalence : 0.7592
## Detection Rate : 0.7317
## Detection Prevalence : 0.9032
## Balanced Accuracy : 0.6258
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n3_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.009828e-01 3.160787e-01 7.929246e-01 8.088611e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 3.424777e-23 7.890674e-223
ad_tda_kde_5.60.5_n3_nn1_cf0_ov_acc<-ad_tda_kde_5.60.5_n3_nn1_cf0$overall[1]
ad_tda_kde_5.60.5_n3_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9637271 0.2878401 0.8101338
## Neg Pred Value Precision Recall
## 0.7156448 0.8101338 0.9637271
## F1 Prevalence Detection Rate
## 0.8802808 0.7592138 0.7316749
## Detection Prevalence Balanced Accuracy
## 0.9031532 0.6257836
ad_tda_kde_5.60.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n3_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.60.5_n3_nn1_fit_re)
diff_tda_kde_5.60.5_nn1_n3_3_fold
## Accuracy
## 1 -0.02108613
## 2 0.07397273
## 3 -0.03125980
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n3_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n3_3_fold_odds.left<-bst_tda_kde_5.60.5_nn1.n3_3_fold$probLeft/bst_tda_kde_5.60.5_nn1.n3_3_fold$probRight
bst_tda_kde_5.60.5_nn1.n3_3_fold_odds.left
## [1] 2
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n3_3_fold
## $winLeft
## [1] 0.5385667
##
## $winRope
## [1] 0.01733333
##
## $winRight
## [1] 0.4441
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n3_3_fold
## $left
## [1] 0.3500051
##
## $rope
## [1] 0.1754636
##
## $right
## [1] 0.4745313
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1_n3_3_fold))
#bf_tda_kde_5.60.5_nn1.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nn1_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_nn1_n3_3_fold)
## t = 0.21512, df = 2, p-value = 0.8496
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1369766 0.1513944
## sample estimates:
## mean of x
## 0.007208932
### Test set diff
diff_tda_kde_5.60.5_nn1.n3_test<-(nn1_cf_ov_acc - ad_tda_kde_5.60.5_n3_nn1_cf0_ov_acc)
diff_tda_kde_5.60.5_nn1.n3_test
## Accuracy
## 0.004095004
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1.n3_test),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n3_test_odds.left<-bst_tda_kde_5.60.5_nn1.n3_test$probLeft/bst_tda_kde_5.60.5_nn1.n3_test$probRight
bst_tda_kde_5.60.5_nn1.n3_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1.n3_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nn1.n3_test)))
#BayesFactor
#bf_tda_kde_5.60.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1.n3_test)) #bf_tda_pca_5.60.5_nn1.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nn1.n3_test))
##Node4
#Neural Network 1
Adult_TDA_KDE_5.60.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n4.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 4743.562112
## iter 10 value 4082.003154
## iter 20 value 3844.886381
## iter 30 value 3844.320194
## iter 40 value 3844.037795
## iter 50 value 3783.250142
## iter 60 value 3779.777945
## iter 70 value 3764.399000
## iter 80 value 3732.849412
## iter 90 value 3726.031070
## iter 100 value 3705.929119
## final value 3705.929119
## stopped after 100 iterations
## # weights: 331
## initial value 4881.123440
## iter 10 value 3813.282267
## iter 20 value 3774.904418
## iter 30 value 3773.126152
## iter 40 value 3736.299792
## iter 50 value 3718.934374
## iter 60 value 3711.839847
## iter 70 value 3456.321896
## iter 80 value 3259.873230
## iter 90 value 3031.649835
## iter 100 value 2745.486028
## final value 2745.486028
## stopped after 100 iterations
## # weights: 551
## initial value 5891.026768
## iter 10 value 4059.191814
## iter 20 value 3789.088050
## iter 30 value 3669.745964
## iter 40 value 3270.470855
## iter 50 value 2870.061515
## iter 60 value 2768.173411
## iter 70 value 2492.653057
## iter 80 value 2326.572339
## iter 90 value 2310.893797
## iter 100 value 2299.913232
## final value 2299.913232
## stopped after 100 iterations
## # weights: 771
## initial value 4580.340904
## iter 10 value 4081.992309
## final value 4081.990352
## converged
## # weights: 221
## initial value 6274.289002
## iter 10 value 4082.543061
## iter 20 value 4082.211973
## final value 4082.171039
## converged
## # weights: 331
## initial value 10062.451062
## iter 10 value 3823.301649
## iter 20 value 3722.726526
## iter 30 value 3709.137477
## iter 40 value 3647.608720
## iter 50 value 3460.656553
## iter 60 value 2953.431424
## iter 70 value 2837.823765
## iter 80 value 2623.790240
## iter 90 value 2507.191775
## iter 100 value 2413.588717
## final value 2413.588717
## stopped after 100 iterations
## # weights: 551
## initial value 7277.957005
## iter 10 value 3891.641007
## iter 20 value 3786.477019
## iter 30 value 3763.987313
## final value 3763.109701
## converged
## # weights: 771
## initial value 9755.597945
## iter 10 value 4023.761348
## iter 20 value 3794.467576
## iter 30 value 3692.551294
## iter 40 value 3557.161583
## iter 50 value 3026.430714
## iter 60 value 2912.210085
## iter 70 value 2867.342680
## iter 80 value 2745.729544
## iter 90 value 2695.306956
## iter 100 value 2690.323351
## final value 2690.323351
## stopped after 100 iterations
## # weights: 221
## initial value 4612.842813
## iter 10 value 4083.028369
## iter 20 value 4082.487665
## iter 30 value 4082.481431
## iter 30 value 4082.481405
## iter 30 value 4082.481404
## final value 4082.481404
## converged
## # weights: 331
## initial value 7186.646190
## iter 10 value 4082.768691
## iter 20 value 4082.586593
## iter 30 value 4069.307946
## iter 40 value 4004.307698
## iter 50 value 3866.971017
## iter 60 value 3788.981353
## iter 70 value 3785.178299
## iter 80 value 3779.044680
## iter 90 value 3771.611214
## iter 100 value 3730.803600
## final value 3730.803600
## stopped after 100 iterations
## # weights: 551
## initial value 6627.454950
## iter 10 value 4077.659143
## iter 20 value 3760.517297
## iter 30 value 3731.633755
## iter 40 value 3729.421207
## iter 50 value 3722.272148
## iter 60 value 3691.391110
## iter 70 value 3660.484868
## iter 80 value 3587.679656
## iter 90 value 3358.442267
## iter 100 value 2541.320297
## final value 2541.320297
## stopped after 100 iterations
## # weights: 771
## initial value 5052.436394
## iter 10 value 3869.525176
## iter 20 value 3855.668289
## iter 30 value 3751.854962
## iter 40 value 3736.770012
## iter 50 value 3731.312462
## iter 60 value 3728.155674
## iter 70 value 3699.681084
## iter 80 value 3688.224898
## iter 90 value 3637.673426
## iter 100 value 3579.925828
## final value 3579.925828
## stopped after 100 iterations
## # weights: 221
## initial value 5740.283044
## iter 10 value 4079.855858
## iter 20 value 3905.420754
## iter 30 value 3734.218084
## iter 40 value 3245.809205
## iter 50 value 2978.356853
## iter 60 value 2742.158672
## iter 70 value 2719.922371
## iter 80 value 2587.955475
## iter 90 value 2436.835439
## iter 100 value 2389.334182
## final value 2389.334182
## stopped after 100 iterations
## # weights: 331
## initial value 4453.427373
## iter 10 value 4082.017777
## iter 20 value 4081.903352
## final value 4081.902076
## converged
## # weights: 551
## initial value 9364.244009
## iter 10 value 4090.885490
## iter 20 value 4056.917891
## iter 30 value 3831.360228
## iter 40 value 3784.967191
## iter 50 value 3780.303587
## iter 60 value 3742.201003
## iter 70 value 3697.443780
## iter 80 value 3623.819339
## iter 90 value 3465.637717
## iter 100 value 2909.213649
## final value 2909.213649
## stopped after 100 iterations
## # weights: 771
## initial value 10054.734148
## iter 10 value 3982.413515
## iter 20 value 3895.971694
## iter 30 value 3775.974744
## iter 40 value 3749.857067
## iter 50 value 3740.956055
## iter 60 value 3734.485929
## iter 70 value 3731.233527
## iter 80 value 3729.940651
## iter 90 value 3727.750446
## iter 100 value 3727.087426
## final value 3727.087426
## stopped after 100 iterations
## # weights: 221
## initial value 6223.342447
## iter 10 value 3955.217098
## iter 20 value 3818.442258
## iter 30 value 3813.421305
## iter 40 value 3771.707462
## iter 50 value 3736.029807
## iter 60 value 3632.809247
## iter 70 value 2913.942291
## iter 80 value 2647.253635
## iter 90 value 2588.944170
## iter 100 value 2549.036549
## final value 2549.036549
## stopped after 100 iterations
## # weights: 331
## initial value 9085.188073
## iter 10 value 4082.543284
## iter 20 value 4082.113294
## iter 30 value 3842.205736
## iter 40 value 3815.874625
## iter 50 value 3808.974865
## iter 60 value 3761.939340
## iter 70 value 3743.463180
## iter 80 value 3734.212489
## iter 90 value 3728.382805
## iter 100 value 3723.717871
## final value 3723.717871
## stopped after 100 iterations
## # weights: 551
## initial value 6178.194311
## iter 10 value 4040.383968
## iter 20 value 3934.259973
## iter 30 value 3922.990568
## iter 40 value 3920.209484
## iter 50 value 3816.068530
## iter 60 value 3774.172993
## iter 70 value 3769.672907
## iter 80 value 3708.567315
## iter 90 value 3566.802374
## iter 100 value 3323.126554
## final value 3323.126554
## stopped after 100 iterations
## # weights: 771
## initial value 9089.230103
## iter 10 value 4060.475745
## iter 20 value 3803.913792
## iter 30 value 3740.215273
## iter 40 value 3698.896549
## iter 50 value 3599.484515
## iter 60 value 3485.602587
## iter 70 value 3174.190943
## iter 80 value 2765.455482
## iter 90 value 2600.908599
## iter 100 value 2463.561005
## final value 2463.561005
## stopped after 100 iterations
## # weights: 221
## initial value 4496.364181
## iter 10 value 4082.831924
## iter 20 value 3905.641377
## iter 30 value 3808.390514
## iter 40 value 3764.442209
## iter 50 value 3703.971843
## iter 60 value 3508.043469
## iter 70 value 3285.345028
## iter 80 value 2895.756325
## iter 90 value 2759.482187
## iter 100 value 2677.279603
## final value 2677.279603
## stopped after 100 iterations
## # weights: 331
## initial value 6718.664622
## iter 10 value 3996.695267
## iter 20 value 3882.365605
## iter 30 value 3809.854339
## iter 40 value 3738.372982
## iter 50 value 3643.473511
## iter 60 value 3121.196685
## iter 70 value 2664.486378
## iter 80 value 2624.555307
## iter 90 value 2571.585237
## iter 100 value 2502.716716
## final value 2502.716716
## stopped after 100 iterations
## # weights: 551
## initial value 7380.678265
## iter 10 value 3855.505147
## iter 20 value 3773.692743
## iter 30 value 3689.249757
## iter 40 value 3544.052897
## iter 50 value 3418.691917
## iter 60 value 3295.138228
## iter 70 value 3253.934617
## iter 80 value 3055.358845
## iter 90 value 2874.229190
## iter 100 value 2817.840818
## final value 2817.840818
## stopped after 100 iterations
## # weights: 771
## initial value 4590.139347
## iter 10 value 4034.644500
## iter 20 value 3840.001780
## iter 30 value 3804.295174
## iter 40 value 3720.367578
## iter 50 value 3686.643967
## iter 60 value 3626.885537
## iter 70 value 3366.394589
## iter 80 value 3083.565173
## iter 90 value 2639.047180
## iter 100 value 2476.827558
## final value 2476.827558
## stopped after 100 iterations
## # weights: 221
## initial value 4207.730003
## iter 10 value 4082.012952
## iter 20 value 4081.921824
## final value 4081.920666
## converged
## # weights: 331
## initial value 4475.383261
## iter 10 value 3908.900743
## iter 20 value 3828.086162
## iter 30 value 3785.148280
## iter 40 value 3762.043731
## iter 50 value 3751.673952
## iter 60 value 3749.836138
## iter 70 value 3743.668912
## iter 80 value 3743.122154
## iter 90 value 3740.635855
## iter 100 value 3736.826256
## final value 3736.826256
## stopped after 100 iterations
## # weights: 551
## initial value 5689.053098
## iter 10 value 3804.821458
## iter 20 value 3728.690682
## iter 30 value 3690.651929
## iter 40 value 3635.726177
## iter 50 value 3462.349271
## iter 60 value 3286.900173
## iter 70 value 3105.164142
## iter 80 value 2676.477266
## iter 90 value 2442.431456
## iter 100 value 2338.344287
## final value 2338.344287
## stopped after 100 iterations
## # weights: 771
## initial value 7807.700386
## final value 4081.750420
## converged
## # weights: 221
## initial value 8131.142158
## iter 10 value 3791.370932
## iter 20 value 3763.441110
## iter 30 value 3762.353982
## iter 40 value 3762.309245
## final value 3762.309089
## converged
## # weights: 331
## initial value 8192.078947
## iter 10 value 3972.203193
## iter 20 value 3768.172303
## iter 30 value 3737.419816
## iter 40 value 3288.862836
## iter 50 value 2826.048806
## iter 60 value 2632.029180
## iter 70 value 2597.225872
## iter 80 value 2490.832581
## iter 90 value 2452.986066
## iter 100 value 2414.083114
## final value 2414.083114
## stopped after 100 iterations
## # weights: 551
## initial value 4125.200094
## iter 10 value 4063.861516
## iter 20 value 3867.144815
## iter 30 value 3830.535774
## iter 40 value 3701.743527
## iter 50 value 3483.888867
## iter 60 value 3208.265277
## iter 70 value 2635.220850
## iter 80 value 2495.172387
## iter 90 value 2407.398453
## iter 100 value 2376.738774
## final value 2376.738774
## stopped after 100 iterations
## # weights: 771
## initial value 5422.578376
## iter 10 value 4069.522731
## iter 20 value 3889.263104
## iter 30 value 3430.805763
## iter 40 value 3186.929397
## iter 50 value 2843.902184
## iter 60 value 2795.277351
## iter 70 value 2690.908599
## iter 80 value 2653.880944
## iter 90 value 2632.614614
## iter 100 value 2576.365964
## final value 2576.365964
## stopped after 100 iterations
## # weights: 221
## initial value 5926.801002
## iter 10 value 4080.844926
## iter 20 value 4070.640191
## iter 30 value 3797.540431
## iter 40 value 3786.783734
## iter 50 value 3617.975538
## iter 60 value 3338.522930
## iter 70 value 3263.627306
## iter 80 value 3190.614989
## iter 90 value 3054.237061
## iter 100 value 2797.254140
## final value 2797.254140
## stopped after 100 iterations
## # weights: 331
## initial value 7343.751275
## iter 10 value 4034.308925
## iter 20 value 3767.539698
## iter 30 value 3672.521884
## iter 40 value 3362.832872
## iter 50 value 3019.600773
## iter 60 value 2651.204333
## iter 70 value 2501.969169
## iter 80 value 2464.661990
## iter 90 value 2405.899350
## iter 100 value 2387.633689
## final value 2387.633689
## stopped after 100 iterations
## # weights: 551
## initial value 5748.819906
## iter 10 value 3998.438471
## iter 20 value 3811.921179
## iter 30 value 3780.170725
## iter 40 value 3758.960497
## iter 50 value 3752.140999
## iter 60 value 3751.842706
## iter 70 value 3738.291923
## iter 80 value 3702.783840
## iter 90 value 3586.474064
## iter 100 value 3195.829757
## final value 3195.829757
## stopped after 100 iterations
## # weights: 771
## initial value 4316.995865
## iter 10 value 4081.889288
## final value 4081.886060
## converged
## # weights: 551
## initial value 7881.622535
## iter 10 value 5730.296369
## iter 20 value 5664.176911
## iter 30 value 5626.830601
## iter 40 value 5564.105803
## iter 50 value 5527.942097
## iter 60 value 5147.529175
## iter 70 value 4587.563972
## iter 80 value 4021.236043
## iter 90 value 3724.097789
## iter 100 value 3647.224599
## final value 3647.224599
## stopped after 100 iterations
Adult_TDA_KDE_5.60.5_n4_NN1Fit0
## Neural Network
##
## 11795 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7864, 7863, 7863
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.8197537 0.25489444
## 2 0.5 0.8150887 0.24152677
## 2 0.7 0.8192426 0.34595694
## 3 0.3 0.8111086 0.23362743
## 3 0.5 0.8404417 0.44133030
## 3 0.7 0.8413710 0.42815038
## 5 0.3 0.8425615 0.44621056
## 5 0.5 0.8290788 0.40330614
## 5 0.7 0.8408655 0.44821064
## 7 0.3 0.7965231 0.07841179
## 7 0.5 0.8400148 0.44721166
## 7 0.7 0.8178032 0.27371827
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.3.
Adult_TDA_KDE_5.60.5_n4_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8560163 0.5501062 Fold1
## 2 0.8530010 0.5447380 Fold3
## 3 0.8186673 0.2437874 Fold2
ad_tda_kde_5.60.5_n4_nn1_fit_re<-Adult_TDA_KDE_5.60.5_n4_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n4_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## -0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.05 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## -0.34 -1.30 0.31 -0.05 -0.07 0.00 -0.61 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.08 -0.01 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 -0.02 0.02 -0.01 0.00 -0.17
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 -0.16 -3.29 -0.10 0.00 0.58
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 -1.13 0.33 -0.01 0.31 -0.55 0.00 -0.12
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.32 0.07 -0.01 0.49 -0.88 0.18 0.36 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## -0.66 0.20 -0.05 0.84 -0.73 -0.02 0.00 -0.24
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## -0.19 -0.02 0.11 -0.18 0.00 -0.24 -0.93 0.59
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.03 0.05 -1.70 -0.11 0.00 0.33 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 -0.01 0.00 0.26 -0.01
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.13 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.12 -0.34 -0.01 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.11 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 -0.82 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## -1.65 0.06 -0.27 0.71 -0.14 -0.20 -0.10 0.05
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## -0.75 -0.46 -0.49 0.00 0.00 0.00 -2.47 0.00
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 0.00 0.00 0.13 -0.15 1.40 0.00 -1.24
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 1.43 0.00 0.00 -0.74 -0.32 -5.95 1.55 1.53
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.48 -0.09 -0.15 0.98 -0.47 0.11 -0.48 -0.06
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.63 -0.86 -1.14 -0.11 -0.47 -0.87 0.77 0.60
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.27 0.74 -0.32 0.54 0.17 -1.68 -0.85 -1.97
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 2.14 -0.88 0.56 -0.29 -1.05 0.02 -1.33 -0.32
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.00 0.00 0.03 0.20 0.87 0.45 -0.15 -0.63
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 1.94 -0.26 -0.62 -0.07 0.19 1.08 0.71 -1.21
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## -0.52 -0.35 0.00 -0.07 -0.50 -0.90 -0.27 0.39
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 1.87 -0.12 0.93 0.18 -0.81 -0.32 -0.40 -0.05
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## -0.37 0.78 -0.71 0.44 -0.35 -0.54 -0.24 -1.06
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.16 -0.77 0.46 -0.63 -0.34
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 0.00 -0.78 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.14 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o
## -3.62 0.09 -0.21 4.49 5.01 0.77
vip(Adult_TDA_KDE_5.60.5_n4_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n4_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.60.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6295 826
## >50K 1121 1526
##
## Accuracy : 0.8007
## 95% CI : (0.7926, 0.8086)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4772
##
## Mcnemar's Test P-Value : 2.684e-11
##
## Sensitivity : 0.8488
## Specificity : 0.6488
## Pos Pred Value : 0.8840
## Neg Pred Value : 0.5765
## Prevalence : 0.7592
## Detection Rate : 0.6445
## Detection Prevalence : 0.7290
## Balanced Accuracy : 0.7488
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6295 826
## >50K 1121 1526
##
## Accuracy : 0.8007
## 95% CI : (0.7926, 0.8086)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4772
##
## Mcnemar's Test P-Value : 2.684e-11
##
## Sensitivity : 0.8488
## Specificity : 0.6488
## Pos Pred Value : 0.8840
## Neg Pred Value : 0.5765
## Prevalence : 0.7592
## Detection Rate : 0.6445
## Detection Prevalence : 0.7290
## Balanced Accuracy : 0.7488
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n4_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.006757e-01 4.772141e-01 7.926130e-01 8.085587e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 7.146690e-23 2.684466e-11
ad_tda_kde_5.60.5_n4_nn1_cf0_ov_acc<-ad_tda_kde_5.60.5_n4_nn1_cf0$overall[1]
ad_tda_kde_5.60.5_n4_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8488403 0.6488095 0.8840051
## Neg Pred Value Precision Recall
## 0.5765017 0.8840051 0.8488403
## F1 Prevalence Detection Rate
## 0.8660659 0.7592138 0.6444513
## Detection Prevalence Balanced Accuracy
## 0.7290131 0.7488249
ad_tda_kde_5.60.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n4_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.60.5_n4_nn1_fit_re)
diff_tda_kde_5.60.5_nn1_n4_3_fold
## Accuracy
## 1 -0.048065774
## 2 -0.002908888
## 3 -0.017272241
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n4_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n4_3_fold_odds.left<-bst_tda_kde_5.60.5_nn1.n4_3_fold$probLeft/bst_tda_kde_5.60.5_nn1.n4_3_fold$probRight
bst_tda_kde_5.60.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n4_3_fold
## $winLeft
## [1] 0.7835333
##
## $winRope
## [1] 0.2164667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n4_3_fold
## $left
## [1] 0.7528292
##
## $rope
## [1] 0.1636701
##
## $right
## [1] 0.08350075
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1_n4_3_fold))
#bf_tda_kde_5.60.5_nn1.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nn1_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_nn1_n4_3_fold)
## t = -1.7079, df = 2, p-value = 0.2298
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.08006110 0.03456316
## sample estimates:
## mean of x
## -0.02274897
### Test set diff
diff_tda_kde_5.60.5_nn1.n4_test<-(nn1_cf_ov_acc - ad_tda_kde_5.60.5_n4_nn1_cf0_ov_acc)
diff_tda_kde_5.60.5_nn1.n4_test
## Accuracy
## 0.004402129
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1.n4_test),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n4_test_odds.left<-bst_tda_kde_5.60.5_nn1.n4_test$probLeft/bst_tda_kde_5.60.5_nn1.n4_test$probRight
bst_tda_kde_5.60.5_nn1.n4_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nn1.n4_test)))
#BayesFactor
#bf_tda_kde_5.60.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1.n4_test)) #bf_tda_pca_5.60.5_nn1.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nn1.n4_test))
##Node5
#Neural Network 1
Adult_TDA_KDE_5.60.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n5.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 3832.727811
## iter 10 value 2564.310250
## iter 20 value 2516.118965
## iter 30 value 2475.935520
## iter 40 value 2404.990071
## iter 50 value 2372.575012
## iter 60 value 2367.968005
## iter 70 value 2322.722369
## iter 80 value 2212.547266
## iter 90 value 2088.824549
## iter 100 value 1825.115690
## final value 1825.115690
## stopped after 100 iterations
## # weights: 331
## initial value 2703.389364
## iter 10 value 2564.221496
## iter 20 value 2559.804802
## iter 30 value 2411.445858
## iter 40 value 2385.896403
## iter 50 value 2379.206664
## iter 60 value 2365.512802
## iter 70 value 2356.540036
## iter 80 value 2173.303728
## iter 90 value 1856.810119
## iter 100 value 1739.576524
## final value 1739.576524
## stopped after 100 iterations
## # weights: 551
## initial value 2793.244928
## iter 10 value 2564.096400
## iter 20 value 2523.084205
## iter 30 value 2383.554163
## iter 40 value 2370.279531
## iter 50 value 2368.591185
## iter 60 value 2361.753001
## iter 70 value 2348.035615
## iter 80 value 2298.216706
## iter 90 value 2229.921359
## iter 100 value 2113.488105
## final value 2113.488105
## stopped after 100 iterations
## # weights: 771
## initial value 3688.374704
## iter 10 value 2546.707711
## iter 20 value 2364.509802
## iter 30 value 2319.807610
## iter 40 value 2263.335921
## iter 50 value 2154.009726
## iter 60 value 1771.578894
## iter 70 value 1662.348358
## iter 80 value 1620.225854
## iter 90 value 1604.021816
## iter 100 value 1590.017819
## final value 1590.017819
## stopped after 100 iterations
## # weights: 221
## initial value 5826.627581
## iter 10 value 2552.330472
## iter 20 value 2407.339451
## iter 30 value 2346.661684
## iter 40 value 2338.248071
## iter 50 value 2240.339117
## iter 60 value 1955.390891
## iter 70 value 1762.235280
## iter 80 value 1656.887573
## iter 90 value 1602.736186
## iter 100 value 1589.830429
## final value 1589.830429
## stopped after 100 iterations
## # weights: 331
## initial value 7226.566088
## iter 10 value 2561.925287
## iter 20 value 2400.723479
## iter 30 value 2397.826297
## iter 40 value 2397.154362
## iter 50 value 2396.238741
## iter 60 value 2390.722635
## iter 70 value 2388.181744
## iter 80 value 2388.153327
## iter 80 value 2388.153325
## final value 2388.153299
## converged
## # weights: 551
## initial value 2705.545308
## iter 10 value 2425.622727
## iter 20 value 2397.368795
## iter 30 value 2397.074456
## iter 40 value 2364.230838
## iter 50 value 2360.834221
## iter 60 value 2259.574561
## iter 70 value 1996.706693
## iter 80 value 1861.300070
## iter 90 value 1801.442000
## iter 100 value 1762.345291
## final value 1762.345291
## stopped after 100 iterations
## # weights: 771
## initial value 7437.506588
## iter 10 value 2552.328232
## iter 20 value 2372.143916
## iter 30 value 2250.951568
## iter 40 value 2080.374070
## iter 50 value 2071.716189
## iter 60 value 1817.124230
## iter 70 value 1732.273522
## iter 80 value 1640.969745
## iter 90 value 1629.630393
## iter 100 value 1587.105265
## final value 1587.105265
## stopped after 100 iterations
## # weights: 221
## initial value 2873.862304
## iter 10 value 2519.553020
## iter 20 value 2406.824068
## iter 30 value 2399.183354
## iter 40 value 2378.168235
## iter 50 value 2107.998064
## iter 60 value 1883.674033
## iter 70 value 1783.465410
## iter 80 value 1753.758637
## iter 90 value 1696.683696
## iter 100 value 1638.035875
## final value 1638.035875
## stopped after 100 iterations
## # weights: 331
## initial value 3197.929095
## iter 10 value 2419.936176
## iter 20 value 2400.542767
## iter 30 value 2392.547347
## iter 40 value 2391.412604
## iter 50 value 2390.394869
## iter 60 value 2335.777613
## iter 70 value 2257.747147
## iter 80 value 2178.120718
## iter 90 value 1817.255441
## iter 100 value 1738.742349
## final value 1738.742349
## stopped after 100 iterations
## # weights: 551
## initial value 5209.785239
## iter 10 value 2564.109830
## iter 20 value 2509.242501
## iter 30 value 2356.965339
## iter 40 value 2349.624596
## iter 50 value 2342.573014
## iter 60 value 2336.875147
## iter 70 value 2003.949480
## iter 80 value 1875.510520
## iter 90 value 1800.224539
## iter 100 value 1680.419436
## final value 1680.419436
## stopped after 100 iterations
## # weights: 771
## initial value 2700.837876
## iter 10 value 2520.552416
## iter 20 value 2472.698789
## iter 30 value 2417.170080
## iter 40 value 2323.011005
## iter 50 value 2295.138509
## iter 60 value 2175.422066
## iter 70 value 2153.066059
## iter 80 value 1967.050792
## iter 90 value 1707.785679
## iter 100 value 1682.667698
## final value 1682.667698
## stopped after 100 iterations
## # weights: 221
## initial value 4556.577192
## final value 2564.438353
## converged
## # weights: 331
## initial value 4197.702688
## iter 10 value 2470.900844
## iter 20 value 2388.650297
## final value 2353.753615
## converged
## # weights: 551
## initial value 6616.076159
## iter 10 value 2564.221494
## final value 2564.221438
## converged
## # weights: 771
## initial value 2948.858342
## iter 10 value 2529.971796
## iter 20 value 2375.511579
## iter 30 value 2344.357552
## iter 40 value 2338.294327
## iter 50 value 2231.809890
## iter 60 value 2088.607450
## iter 70 value 1969.780668
## iter 80 value 1833.983472
## iter 90 value 1731.200582
## iter 100 value 1624.994021
## final value 1624.994021
## stopped after 100 iterations
## # weights: 221
## initial value 4850.067606
## iter 10 value 2567.914909
## iter 20 value 2564.765927
## iter 30 value 2564.727648
## iter 40 value 2508.980634
## iter 50 value 2439.105745
## iter 60 value 2439.045829
## iter 70 value 2397.157773
## iter 80 value 2388.439861
## iter 90 value 2385.118470
## iter 100 value 2379.069785
## final value 2379.069785
## stopped after 100 iterations
## # weights: 331
## initial value 6617.532237
## iter 10 value 2553.667477
## iter 20 value 2542.751364
## iter 30 value 2391.201696
## iter 40 value 2385.883431
## iter 50 value 2380.021530
## iter 60 value 2350.266668
## iter 70 value 2341.836919
## iter 80 value 2313.973960
## iter 90 value 2243.170364
## iter 100 value 2126.031763
## final value 2126.031763
## stopped after 100 iterations
## # weights: 551
## initial value 6366.261703
## iter 10 value 2574.035660
## iter 20 value 2567.653201
## iter 30 value 2553.074086
## iter 40 value 2483.349960
## iter 50 value 2401.854394
## iter 60 value 2387.350490
## iter 70 value 2386.586362
## iter 80 value 2385.213577
## iter 90 value 2385.113710
## iter 100 value 2385.107900
## final value 2385.107900
## stopped after 100 iterations
## # weights: 771
## initial value 3079.206095
## iter 10 value 2462.033309
## iter 20 value 2375.769261
## iter 30 value 2357.094768
## iter 40 value 2346.654017
## iter 50 value 2342.650084
## iter 60 value 2325.223074
## iter 70 value 2304.810168
## iter 80 value 2156.372249
## iter 90 value 1901.890183
## iter 100 value 1811.752571
## final value 1811.752571
## stopped after 100 iterations
## # weights: 221
## initial value 3516.600127
## iter 10 value 2555.981337
## iter 20 value 2389.248524
## iter 30 value 2369.298328
## iter 40 value 2338.966376
## iter 50 value 2244.293065
## iter 60 value 1884.550963
## iter 70 value 1738.048126
## iter 80 value 1643.445959
## iter 90 value 1628.319627
## iter 100 value 1624.667813
## final value 1624.667813
## stopped after 100 iterations
## # weights: 331
## initial value 5438.767376
## iter 10 value 2557.404484
## iter 20 value 2430.824001
## iter 30 value 2344.072029
## iter 40 value 2282.868879
## iter 50 value 2107.776216
## iter 60 value 1922.868554
## iter 70 value 1816.383589
## iter 80 value 1722.019789
## iter 90 value 1659.144892
## iter 100 value 1647.958271
## final value 1647.958271
## stopped after 100 iterations
## # weights: 551
## initial value 3601.858793
## iter 10 value 2568.615797
## iter 20 value 2565.058976
## iter 30 value 2565.016849
## final value 2565.016147
## converged
## # weights: 771
## initial value 7267.488662
## iter 10 value 2452.100877
## iter 20 value 2384.423671
## iter 30 value 2360.747222
## iter 40 value 2342.020281
## iter 50 value 2317.176913
## iter 60 value 2102.612601
## iter 70 value 2048.356862
## iter 80 value 1856.534069
## iter 90 value 1738.548655
## iter 100 value 1692.650427
## final value 1692.650427
## stopped after 100 iterations
## # weights: 221
## initial value 3399.647946
## iter 10 value 2497.312703
## iter 20 value 2376.880990
## iter 30 value 2343.044755
## iter 40 value 2146.683620
## iter 50 value 1827.372332
## iter 60 value 1768.648830
## iter 70 value 1733.868905
## iter 80 value 1689.599578
## iter 90 value 1656.559301
## iter 100 value 1629.993888
## final value 1629.993888
## stopped after 100 iterations
## # weights: 331
## initial value 3930.764042
## iter 10 value 2552.909953
## iter 20 value 2376.979030
## iter 30 value 2345.274266
## iter 40 value 2307.668222
## iter 50 value 1970.558030
## iter 60 value 1851.521300
## iter 70 value 1836.050868
## iter 80 value 1813.762086
## iter 90 value 1805.144220
## iter 100 value 1729.338864
## final value 1729.338864
## stopped after 100 iterations
## # weights: 551
## initial value 4191.956408
## iter 10 value 2390.501717
## iter 20 value 2367.617275
## iter 30 value 2248.483308
## iter 40 value 2135.640326
## iter 50 value 1833.965293
## iter 60 value 1681.148291
## iter 70 value 1654.805708
## iter 80 value 1624.432064
## iter 90 value 1606.469224
## iter 100 value 1604.423763
## final value 1604.423763
## stopped after 100 iterations
## # weights: 771
## initial value 3346.707581
## iter 10 value 2382.197544
## iter 20 value 2373.366687
## iter 30 value 2343.656505
## iter 40 value 2302.167935
## iter 50 value 2281.306619
## iter 60 value 2243.369266
## iter 70 value 1841.459603
## iter 80 value 1745.231063
## iter 90 value 1671.109794
## iter 100 value 1649.230432
## final value 1649.230432
## stopped after 100 iterations
## # weights: 221
## initial value 4245.111206
## iter 10 value 2560.762130
## iter 20 value 2381.422780
## iter 30 value 2380.093593
## iter 40 value 2380.069384
## iter 50 value 2379.159996
## iter 60 value 2374.015396
## iter 70 value 2371.720807
## final value 2371.677109
## converged
## # weights: 331
## initial value 6190.790887
## iter 10 value 2551.796125
## iter 20 value 2359.601200
## iter 30 value 2342.397432
## iter 40 value 2271.677575
## iter 50 value 1986.590424
## iter 60 value 1777.770018
## iter 70 value 1689.919743
## iter 80 value 1663.812898
## iter 90 value 1614.655194
## iter 100 value 1604.839477
## final value 1604.839477
## stopped after 100 iterations
## # weights: 551
## initial value 4104.439054
## iter 10 value 2566.199351
## iter 20 value 2564.710453
## iter 30 value 2564.364703
## iter 40 value 2564.347843
## iter 50 value 2454.282285
## iter 60 value 2377.284899
## iter 70 value 2372.613549
## iter 80 value 2372.345400
## iter 90 value 2360.266396
## iter 100 value 2281.778536
## final value 2281.778536
## stopped after 100 iterations
## # weights: 771
## initial value 4746.647756
## iter 10 value 2567.878485
## iter 20 value 2563.554181
## iter 30 value 2448.280775
## iter 40 value 2386.549955
## iter 50 value 2365.083308
## iter 60 value 2362.227628
## iter 70 value 2359.884923
## iter 80 value 2337.979875
## iter 90 value 2318.634065
## iter 100 value 2237.058509
## final value 2237.058509
## stopped after 100 iterations
## # weights: 221
## initial value 5967.236747
## iter 10 value 2564.724984
## iter 20 value 2459.846500
## iter 30 value 2419.213650
## iter 40 value 2396.402868
## iter 50 value 2388.123379
## iter 60 value 2382.962916
## iter 70 value 2354.090712
## iter 80 value 2347.449266
## iter 90 value 2345.788778
## iter 100 value 2344.263885
## final value 2344.263885
## stopped after 100 iterations
## # weights: 331
## initial value 4379.902355
## iter 10 value 2568.432841
## iter 20 value 2565.057201
## iter 30 value 2565.016499
## final value 2565.016029
## converged
## # weights: 551
## initial value 5106.974462
## iter 10 value 2564.509653
## iter 20 value 2518.402045
## iter 30 value 2514.583452
## iter 40 value 2396.673139
## iter 50 value 2292.443606
## iter 60 value 2000.917904
## iter 70 value 1870.571050
## iter 80 value 1801.725193
## iter 90 value 1775.717203
## iter 100 value 1762.646135
## final value 1762.646135
## stopped after 100 iterations
## # weights: 771
## initial value 10440.989925
## iter 10 value 2577.280097
## iter 20 value 2566.707260
## iter 30 value 2566.563859
## iter 40 value 2540.606789
## iter 50 value 2391.629931
## iter 60 value 2379.208735
## iter 70 value 2375.705543
## iter 80 value 2374.971410
## iter 90 value 2374.933539
## iter 100 value 2374.827884
## final value 2374.827884
## stopped after 100 iterations
## # weights: 771
## initial value 4015.595098
## iter 10 value 3605.651894
## iter 20 value 3555.159572
## iter 30 value 3483.494918
## iter 40 value 3383.240517
## iter 50 value 3040.171039
## iter 60 value 2746.060329
## iter 70 value 2633.033231
## iter 80 value 2564.219021
## iter 90 value 2444.952872
## iter 100 value 2394.871140
## final value 2394.871140
## stopped after 100 iterations
Adult_TDA_KDE_5.60.5_n5_NN1Fit0
## Neural Network
##
## 8940 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5960, 5960, 5960
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.8597315 0.2122316
## 2 0.5 0.8664430 0.2608848
## 2 0.7 0.8655481 0.2732793
## 3 0.3 0.8647651 0.2681231
## 3 0.5 0.8665548 0.2670302
## 3 0.7 0.8583893 0.1985636
## 5 0.3 0.8533557 0.1406378
## 5 0.5 0.8583893 0.2499608
## 5 0.7 0.8606264 0.2497128
## 7 0.3 0.8684564 0.3465052
## 7 0.5 0.8667785 0.2494509
## 7 0.7 0.8653244 0.2962923
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.3.
Adult_TDA_KDE_5.60.5_n5_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8681208 0.2562963 Fold2
## 2 0.8731544 0.4324324 Fold1
## 3 0.8640940 0.3507870 Fold3
ad_tda_kde_5.60.5_n5_nn1_fit_re<-Adult_TDA_KDE_5.60.5_n5_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n5_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## -0.02 -0.11 0.04 -0.02 -0.31 0.00 0.08 0.14
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## -0.01 0.05 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.18 0.00 0.00 -0.01
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 -0.19 -0.02 -0.19 0.00 0.09
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 -0.05 0.12 0.02 0.04 -0.23 0.00 0.13
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## -0.29 0.00 -0.03 0.08 0.00 0.00 0.10 0.01
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.34 -0.10 -0.07 0.20 -0.14 0.04 -0.01 0.03
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -0.14 -0.01 0.08 -0.03 -0.02 -0.04 -0.12 0.09
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.02 0.04 -0.40 0.06 0.00 -0.01 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.01 0.00 0.01 -0.03
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 -0.02 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.02 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 -0.05 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## 0.35 -0.05 0.15 -0.70 -0.13 0.20 0.03 -0.08
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## 0.33 0.24 0.31 0.00 0.00 0.00 0.84 0.00
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 0.00 0.00 0.00 -0.90 0.00 0.00 0.53
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.00 0.00 0.00 -0.12 0.42 0.51 -1.28 -0.88
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.74 0.78 0.49 0.00 0.35 -0.21 0.38 -0.06
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## -0.54 0.39 0.40 -0.03 0.41 0.96 -0.63 -0.40
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.01 -0.85 0.16 0.06 -0.05 0.42 0.62 0.48
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## -1.18 0.82 -0.63 -0.04 0.47 -0.26 0.55 -0.20
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.00 0.00 -0.03 -0.31 -0.49 -0.58 -0.97 0.75
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## -1.51 0.88 -0.39 0.48 -0.08 0.04 -0.57 0.75
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## -0.34 0.42 0.00 0.08 0.71 0.49 0.60 -0.91
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## -0.58 0.36 -1.06 0.94 0.62 0.11 0.62 0.16
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.63 -0.61 0.12 -0.06 0.19 0.58 -0.35 0.36
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.33 0.67 -0.52 -0.09 -1.09
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 0.00 -0.01 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.00 0.00 0.00
## b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6 i7->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h6 i9->h6 i10->h6 i11->h6 i12->h6 i13->h6 i14->h6 i15->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h6 i17->h6 i18->h6 i19->h6 i20->h6 i21->h6 i22->h6 i23->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h6 i25->h6 i26->h6 i27->h6 i28->h6 i29->h6 i30->h6 i31->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h6 i33->h6 i34->h6 i35->h6 i36->h6 i37->h6 i38->h6 i39->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h6 i41->h6 i42->h6 i43->h6 i44->h6 i45->h6 i46->h6 i47->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h6 i49->h6 i50->h6 i51->h6 i52->h6 i53->h6 i54->h6 i55->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h6 i57->h6 i58->h6 i59->h6 i60->h6 i61->h6 i62->h6 i63->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h6 i65->h6 i66->h6 i67->h6 i68->h6 i69->h6 i70->h6 i71->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h6 i73->h6 i74->h6 i75->h6 i76->h6 i77->h6 i78->h6 i79->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h6 i81->h6 i82->h6 i83->h6 i84->h6 i85->h6 i86->h6 i87->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h6 i89->h6 i90->h6 i91->h6 i92->h6 i93->h6 i94->h6 i95->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h6 i97->h6 i98->h6 i99->h6 i100->h6 i101->h6 i102->h6 i103->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6
## 0.00 0.00 0.00 0.00 0.00
## b->h7 i1->h7 i2->h7 i3->h7 i4->h7 i5->h7 i6->h7 i7->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h7 i9->h7 i10->h7 i11->h7 i12->h7 i13->h7 i14->h7 i15->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h7 i17->h7 i18->h7 i19->h7 i20->h7 i21->h7 i22->h7 i23->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h7 i25->h7 i26->h7 i27->h7 i28->h7 i29->h7 i30->h7 i31->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h7 i33->h7 i34->h7 i35->h7 i36->h7 i37->h7 i38->h7 i39->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h7 i41->h7 i42->h7 i43->h7 i44->h7 i45->h7 i46->h7 i47->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h7 i49->h7 i50->h7 i51->h7 i52->h7 i53->h7 i54->h7 i55->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h7 i57->h7 i58->h7 i59->h7 i60->h7 i61->h7 i62->h7 i63->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h7 i65->h7 i66->h7 i67->h7 i68->h7 i69->h7 i70->h7 i71->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h7 i73->h7 i74->h7 i75->h7 i76->h7 i77->h7 i78->h7 i79->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h7 i81->h7 i82->h7 i83->h7 i84->h7 i85->h7 i86->h7 i87->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h7 i89->h7 i90->h7 i91->h7 i92->h7 i93->h7 i94->h7 i95->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h7 i97->h7 i98->h7 i99->h7 i100->h7 i101->h7 i102->h7 i103->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o
## 0.40 0.00 1.54 0.37 -6.40 0.18 0.40 0.41
vip(Adult_TDA_KDE_5.60.5_n5_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n5_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.60.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6416 1459
## >50K 1000 893
##
## Accuracy : 0.7483
## 95% CI : (0.7395, 0.7568)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.9943
##
## Kappa : 0.2623
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.8652
## Specificity : 0.3797
## Pos Pred Value : 0.8147
## Neg Pred Value : 0.4717
## Prevalence : 0.7592
## Detection Rate : 0.6568
## Detection Prevalence : 0.8062
## Balanced Accuracy : 0.6224
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6416 1459
## >50K 1000 893
##
## Accuracy : 0.7483
## 95% CI : (0.7395, 0.7568)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.9943
##
## Kappa : 0.2623
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.8652
## Specificity : 0.3797
## Pos Pred Value : 0.8147
## Neg Pred Value : 0.4717
## Prevalence : 0.7592
## Detection Rate : 0.6568
## Detection Prevalence : 0.8062
## Balanced Accuracy : 0.6224
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n5_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.482596e-01 2.623111e-01 7.395283e-01 7.568427e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 9.943459e-01 2.557681e-20
ad_tda_kde_5.60.5_n5_nn1_cf0_ov_acc<-ad_tda_kde_5.60.5_n5_nn1_cf0$overall[1]
ad_tda_kde_5.60.5_n5_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8651564 0.3796769 0.8147302
## Neg Pred Value Precision Recall
## 0.4717380 0.8147302 0.8651564
## F1 Prevalence Detection Rate
## 0.8391864 0.7592138 0.6568387
## Detection Prevalence Balanced Accuracy
## 0.8062039 0.6224166
ad_tda_kde_5.60.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n5_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.60.5_n5_nn1_fit_re)
diff_tda_kde_5.60.5_nn1_n5_3_fold
## Accuracy
## 1 -0.06017030
## 2 -0.02306223
## 3 -0.06269886
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n5_3_fold_odds.left<-bst_tda_kde_5.60.5_nn1.n5_3_fold$probLeft/bst_tda_kde_5.60.5_nn1.n5_3_fold$probRight
bst_tda_kde_5.60.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9916667
##
## $winRope
## [1] 0.008333333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n5_3_fold
## $left
## [1] 0.9396975
##
## $rope
## [1] 0.03123185
##
## $right
## [1] 0.02907065
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1_n5_3_fold))
#bf_tda_kde_5.60.5_nn1.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nn1_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_nn1_n5_3_fold)
## t = -3.7969, df = 2, p-value = 0.06289
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.103767627 0.006480036
## sample estimates:
## mean of x
## -0.0486438
### Test set diff
diff_tda_kde_5.60.5_nn1.n5_test<-(nn1_cf_ov_acc - ad_tda_kde_5.60.5_n5_nn1_cf0_ov_acc)
diff_tda_kde_5.60.5_nn1.n5_test
## Accuracy
## 0.05681818
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nn1.n5_test),-0.01,0.01)
bst_tda_kde_5.60.5_nn1.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nn1.n5_test_odds.left<-bst_tda_kde_5.60.5_nn1.n5_test$probLeft/bst_tda_kde_5.60.5_nn1.n5_test$probRight
bst_tda_kde_5.60.5_nn1.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nn1.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nn1.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nn1.n5_test)))
#BayesFactor
#bf_tda_kde_5.60.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nn1.n5_test)) #bf_tda_pca_5.60.5_nn1.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nn1.n5_test))
##Logistic Regression
adultLrFit <- train(as.factor(adult_df1) ~ .,
data = adult.one_hot_df4Train,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
adultLrFit
## Generalized Linear Model
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15195, 15196, 15195
## Resampling results:
##
## Accuracy Kappa
## 0.8510945 0.5676792
adultLrFit$resample
## Accuracy Kappa Resample
## 1 0.8496973 0.5632382 Fold1
## 2 0.8482296 0.5620409 Fold2
## 3 0.8553567 0.5777586 Fold3
ad_lr_fit_re<-adultLrFit$resample[1]
summary(adultLrFit)
##
## Call:
## NULL
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.023e+13 6.477e+12 -1.580 0.114062
## V1 2.877e-02 1.982e-03 14.517 < 2e-16 ***
## V2.. 1.023e+13 6.477e+12 1.580 0.114062
## V2.Federal.gov 1.023e+13 6.477e+12 1.580 0.114062
## V2.Local.gov 1.023e+13 6.477e+12 1.580 0.114062
## V2.Never.worked -4.493e+15 6.477e+12 -693.776 < 2e-16 ***
## V2.Private 1.023e+13 6.477e+12 1.580 0.114062
## V2.Self.emp.inc 1.023e+13 6.477e+12 1.580 0.114062
## V2.Self.emp.not.inc 1.023e+13 6.477e+12 1.580 0.114062
## V2.State.gov 1.023e+13 6.477e+12 1.580 0.114062
## V2.Without.pay 1.023e+13 6.477e+12 1.580 0.114062
## V3 6.829e-07 2.062e-07 3.311 0.000929 ***
## V4.10th -1.171e+00 1.825e-01 -6.415 1.41e-10 ***
## V4.11th -1.000e+00 1.819e-01 -5.498 3.83e-08 ***
## V4.12th -7.853e-01 2.752e-01 -2.853 0.004333 **
## V4.1st.4th -1.871e+00 6.099e-01 -3.067 0.002162 **
## V4.5th.6th -1.244e+00 3.324e-01 -3.743 0.000182 ***
## V4.7th.8th -1.593e+00 2.140e-01 -7.447 9.58e-14 ***
## V4.9th -1.634e+00 2.940e-01 -5.558 2.73e-08 ***
## V4.Assoc.acdm 2.427e-01 1.188e-01 2.043 0.041024 *
## V4.Assoc.voc 2.679e-01 1.016e-01 2.637 0.008373 **
## V4.Bachelors 7.912e-01 6.671e-02 11.861 < 2e-16 ***
## V4.Doctorate 2.068e+00 1.960e-01 10.554 < 2e-16 ***
## V4.HS.grad -3.188e-01 6.025e-02 -5.291 1.21e-07 ***
## V4.Masters 1.203e+00 9.717e-02 12.381 < 2e-16 ***
## V4.Preschool -2.478e+01 4.565e+04 -0.001 0.999567
## V4.Prof.school 1.768e+00 1.640e-01 10.778 < 2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -2.067e-01 1.836e-01 -1.126 0.260179
## V6.Married.AF.spouse 2.268e+00 6.704e-01 3.383 0.000716 ***
## V6.Married.civ.spouse 2.016e+00 3.637e-01 5.543 2.97e-08 ***
## V6.Married.spouse.absent -2.659e-01 3.270e-01 -0.813 0.416064
## V6.Never.married -5.888e-01 1.902e-01 -3.096 0.001963 **
## V6.Separated -9.858e-02 2.441e-01 -0.404 0.686323
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.023e-01 1.192e-01 1.697 0.089726 .
## V7.Armed.Forces -7.939e-01 1.624e+00 -0.489 0.624962
## V7.Craft.repair 2.979e-01 1.019e-01 2.923 0.003466 **
## V7.Exec.managerial 1.007e+00 1.044e-01 9.650 < 2e-16 ***
## V7.Farming.fishing -8.628e-01 1.757e-01 -4.910 9.12e-07 ***
## V7.Handlers.cleaners -5.935e-01 1.771e-01 -3.351 0.000806 ***
## V7.Machine.op.inspct -2.407e-02 1.274e-01 -0.189 0.850211
## V7.Other.service -6.090e-01 1.490e-01 -4.087 4.38e-05 ***
## V7.Priv.house.serv -3.405e+00 1.946e+00 -1.750 0.080130 .
## V7.Prof.specialty 6.747e-01 1.122e-01 6.012 1.83e-09 ***
## V7.Protective.serv 7.220e-01 1.550e-01 4.658 3.20e-06 ***
## V7.Sales 4.921e-01 1.077e-01 4.570 4.88e-06 ***
## V7.Tech.support 9.149e-01 1.416e-01 6.461 1.04e-10 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.421e+00 1.220e-01 -11.642 < 2e-16 ***
## V8.Not.in.family -9.011e-01 3.368e-01 -2.675 0.007467 **
## V8.Other.relative -1.859e+00 3.008e-01 -6.181 6.38e-10 ***
## V8.Own.child -2.126e+00 3.330e-01 -6.383 1.74e-10 ***
## V8.Unmarried -1.056e+00 3.482e-01 -3.033 0.002418 **
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -7.414e-01 2.697e-01 -2.749 0.005977 **
## V9.Asian.Pac.Islander 4.763e-02 1.879e-01 0.253 0.799923
## V9.Black -1.683e-01 9.175e-02 -1.835 0.066569 .
## V9.Other -3.401e-01 3.230e-01 -1.053 0.292291
## V9.White NA NA NA NA
## V10.Female -8.625e-01 9.424e-02 -9.152 < 2e-16 ***
## V10.Male NA NA NA NA
## V11 3.164e-04 1.239e-05 25.527 < 2e-16 ***
## V12 6.201e-04 4.394e-05 14.113 < 2e-16 ***
## V13 2.963e-02 1.935e-03 15.312 < 2e-16 ***
## V14.. -4.819e-01 7.233e-01 -0.666 0.505218
## V14.Cambodia 1.147e+00 1.099e+00 1.043 0.296775
## V14.Canada -1.529e-01 7.754e-01 -0.197 0.843649
## V14.China -8.039e-01 8.419e-01 -0.955 0.339658
## V14.Columbia -1.342e+00 1.150e+00 -1.168 0.242916
## V14.Cuba 2.932e-01 7.928e-01 0.370 0.711537
## V14.Dominican.Republic -1.810e+00 1.268e+00 -1.427 0.153437
## V14.Ecuador -2.579e-01 1.069e+00 -0.241 0.809409
## V14.El.Salvador -8.031e-01 9.135e-01 -0.879 0.379330
## V14.England 1.845e-01 7.991e-01 0.231 0.817367
## V14.France 2.495e-01 1.007e+00 0.248 0.804303
## V14.Germany 1.113e-01 7.687e-01 0.145 0.884870
## V14.Greece -1.073e+00 9.239e-01 -1.161 0.245646
## V14.Guatemala 2.472e-01 1.066e+00 0.232 0.816614
## V14.Haiti -1.946e+00 1.419e+00 -1.371 0.170353
## V14.Holand.Netherlands -2.264e+01 3.459e+05 0.000 0.999948
## V14.Honduras -1.543e+00 2.583e+00 -0.597 0.550296
## V14.Hong 3.604e-02 1.018e+00 0.035 0.971757
## V14.Hungary -4.154e-01 1.208e+00 -0.344 0.731004
## V14.India -7.107e-01 8.101e-01 -0.877 0.380320
## V14.Iran -1.027e-01 8.592e-01 -0.120 0.904856
## V14.Ireland 4.688e-01 1.051e+00 0.446 0.655675
## V14.Italy 3.216e-01 8.264e-01 0.389 0.697120
## V14.Jamaica -1.134e+00 9.678e-01 -1.172 0.241185
## V14.Japan 3.956e-01 8.559e-01 0.462 0.643909
## V14.Laos -1.050e+00 1.320e+00 -0.795 0.426381
## V14.Mexico -6.909e-01 7.463e-01 -0.926 0.354542
## V14.Nicaragua -2.445e+01 5.937e+04 0.000 0.999671
## V14.Outlying.US.Guam.USVI.etc. -2.389e+01 9.718e+04 0.000 0.999804
## V14.Peru -1.208e+00 1.440e+00 -0.839 0.401520
## V14.Philippines 1.230e-02 7.716e-01 0.016 0.987281
## V14.Poland -4.460e-01 8.546e-01 -0.522 0.601727
## V14.Portugal -2.091e-01 1.041e+00 -0.201 0.840857
## V14.Puerto.Rico -1.305e+00 8.687e-01 -1.502 0.133081
## V14.Scotland -1.326e-01 1.076e+00 -0.123 0.901896
## V14.South -1.915e+00 9.039e-01 -2.118 0.034147 *
## V14.Taiwan -4.376e-01 9.296e-01 -0.471 0.637811
## V14.Thailand -3.764e-01 1.237e+00 -0.304 0.761007
## V14.Trinadad.Tobago -4.231e-01 1.156e+00 -0.366 0.714328
## V14.United.States -1.082e-01 7.053e-01 -0.153 0.878016
## V14.Vietnam -1.432e+00 9.958e-01 -1.438 0.150307
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 25165 on 22792 degrees of freedom
## Residual deviance: 14343 on 22693 degrees of freedom
## AIC: 14543
##
## Number of Fisher Scoring iterations: 25
vip(adultLrFit,25) + ggtitle('non-TDA-Assisted LR')

# Predict outcome using model from training data based on testing data
predictions <- predict(adultLrFit, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
lr_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
lr_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6923 940
## >50K 493 1412
##
## Accuracy : 0.8533
## 95% CI : (0.8461, 0.8603)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5709
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9335
## Specificity : 0.6003
## Pos Pred Value : 0.8805
## Neg Pred Value : 0.7412
## Prevalence : 0.7592
## Detection Rate : 0.7087
## Detection Prevalence : 0.8050
## Balanced Accuracy : 0.7669
##
## 'Positive' Class : <=50K
##
lr_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.532965e-01 5.709073e-01 8.461236e-01 8.602580e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 7.606349e-117 4.844546e-32
lr_cf_ov_acc<-lr_cf$overall[1]
lr_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9335221 0.6003401 0.8804528
## Neg Pred Value Precision Recall
## 0.7412073 0.8804528 0.9335221
## F1 Prevalence Detection Rate
## 0.9062111 0.7592138 0.7087428
## Detection Prevalence Balanced Accuracy
## 0.8049754 0.7669311
lr_cf_pre_rec_f1<-lr_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.60.5_n1_LrFit0 <- glm(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n1.vec, family = 'binomial')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.60.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.60.5.n1.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.60.5_n1_LrFit0
## Generalized Linear Model
##
## 6560 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 4373, 4374, 4373
## Resampling results:
##
## Accuracy Kappa
## 0.8458861 0.1724515
Adult_TDA_PC_5.60.5_n1_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.7869227 0.09940988 Fold1
## 2 0.8586459 0.18178118 Fold2
## 3 0.8920896 0.23616344 Fold3
ad_tda_pc_5.60.5_n1_lr_fit_re<-Adult_TDA_PC_5.60.5_n1_LrFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n1_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (18 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.456e+13 1.392e+13 -1.764 0.077701 .
## V1 -4.313e-02 4.837e-03 -8.916 < 2e-16 ***
## V2.. 1.632e+00 7.185e-01 2.271 0.023121 *
## V2.Federal.gov 8.587e-02 3.110e-01 0.276 0.782470
## V2.Local.gov -4.136e-01 2.410e-01 -1.717 0.086049 .
## V2.Never.worked NA NA NA NA
## V2.Private 1.231e+00 2.188e-01 5.624 1.86e-08 ***
## V2.Self.emp.inc -7.838e-01 2.310e-01 -3.393 0.000691 ***
## V2.Self.emp.not.inc -1.234e+00 2.226e-01 -5.543 2.98e-08 ***
## V2.State.gov NA NA NA NA
## V2.Without.pay NA NA NA NA
## V3 1.258e-06 5.262e-07 2.390 0.016853 *
## V4.10th 7.061e-01 9.483e-01 0.745 0.456494
## V4.11th 2.465e+01 7.928e+04 0.000 0.999752
## V4.12th -1.235e-01 1.193e+00 -0.104 0.917520
## V4.1st.4th 2.282e+01 2.834e+05 0.000 0.999936
## V4.5th.6th 2.476e+01 1.585e+05 0.000 0.999875
## V4.7th.8th 9.693e-01 6.177e-01 1.569 0.116582
## V4.9th 4.559e-02 8.163e-01 0.056 0.955460
## V4.Assoc.acdm -6.941e-01 3.288e-01 -2.111 0.034778 *
## V4.Assoc.voc -7.554e-01 2.730e-01 -2.767 0.005661 **
## V4.Bachelors -1.211e+00 1.778e-01 -6.813 9.55e-12 ***
## V4.Doctorate -9.051e-01 2.759e-01 -3.281 0.001036 **
## V4.HS.grad 1.309e-01 1.997e-01 0.655 0.512250
## V4.Masters -1.181e+00 2.049e-01 -5.764 8.22e-09 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school -4.743e-01 2.596e-01 -1.827 0.067680 .
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 2.456e+13 1.392e+13 1.764 0.077701 .
## V6.Married.AF.spouse 2.456e+13 1.392e+13 1.764 0.077701 .
## V6.Married.civ.spouse 2.456e+13 1.392e+13 1.764 0.077701 .
## V6.Married.spouse.absent 2.456e+13 1.392e+13 1.764 0.077701 .
## V6.Never.married 2.456e+13 1.392e+13 1.764 0.077701 .
## V6.Separated 2.456e+13 1.392e+13 1.764 0.077701 .
## V6.Widowed 2.456e+13 1.392e+13 1.764 0.077701 .
## V7.. NA NA NA NA
## V7.Adm.clerical 1.935e+00 8.107e-01 2.387 0.016982 *
## V7.Armed.Forces 2.422e+01 5.165e+05 0.000 0.999963
## V7.Craft.repair -3.594e-01 3.351e-01 -1.073 0.283478
## V7.Exec.managerial -2.004e-01 3.206e-01 -0.625 0.531873
## V7.Farming.fishing -1.233e+00 3.453e-01 -3.571 0.000355 ***
## V7.Handlers.cleaners 2.362e+01 6.352e+04 0.000 0.999703
## V7.Machine.op.inspct 1.127e+00 8.224e-01 1.371 0.170500
## V7.Other.service 5.935e-01 8.365e-01 0.709 0.478035
## V7.Priv.house.serv NA NA NA NA
## V7.Prof.specialty -4.022e-01 3.327e-01 -1.209 0.226602
## V7.Protective.serv 4.992e-01 4.674e-01 1.068 0.285536
## V7.Sales 3.841e-01 3.411e-01 1.126 0.260134
## V7.Tech.support 1.094e+00 6.234e-01 1.755 0.079280 .
## V7.Transport.moving NA NA NA NA
## V8.Husband -2.370e+01 5.521e+05 0.000 0.999966
## V8.Not.in.family 3.308e+00 6.479e+05 0.000 0.999996
## V8.Other.relative 1.351e+00 5.988e+05 0.000 0.999998
## V8.Own.child 1.028e+00 7.591e+05 0.000 0.999999
## V8.Unmarried 2.796e+00 6.480e+05 0.000 0.999997
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 2.450e+01 1.198e+05 0.000 0.999837
## V9.Asian.Pac.Islander 1.134e+00 5.608e-01 2.022 0.043224 *
## V9.Black 2.364e+00 7.597e-01 3.111 0.001862 **
## V9.Other 2.051e-01 1.147e+00 0.179 0.858058
## V9.White NA NA NA NA
## V10.Female 1.992e+00 5.550e+05 0.000 0.999997
## V10.Male NA NA NA NA
## V11 2.186e-04 2.559e-05 8.545 < 2e-16 ***
## V12 5.306e-04 9.479e-05 5.597 2.18e-08 ***
## V13 -3.835e-02 3.793e-03 -10.112 < 2e-16 ***
## V14.. 4.369e-01 1.284e+00 0.340 0.733624
## V14.Cambodia 2.410e+01 2.258e+05 0.000 0.999915
## V14.Canada 2.272e-01 1.380e+00 0.165 0.869231
## V14.China 3.653e-01 1.730e+00 0.211 0.832771
## V14.Columbia -1.012e+00 1.993e+00 -0.508 0.611573
## V14.Cuba 8.292e-01 1.488e+00 0.557 0.577398
## V14.Dominican.Republic NA NA NA NA
## V14.Ecuador 2.415e+01 2.787e+05 0.000 0.999931
## V14.El.Salvador 2.500e+01 1.882e+05 0.000 0.999894
## V14.England 1.656e+00 1.666e+00 0.994 0.320287
## V14.France -9.396e-01 1.521e+00 -0.618 0.536688
## V14.Germany 7.750e-01 1.485e+00 0.522 0.601816
## V14.Greece -1.693e+00 1.398e+00 -1.210 0.226109
## V14.Guatemala 2.169e+01 5.312e+05 0.000 0.999967
## V14.Haiti NA NA NA NA
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -1.273e+00 5.331e+05 0.000 0.999998
## V14.Hong -2.072e+00 1.801e+00 -1.150 0.249956
## V14.Hungary -5.727e-01 1.819e+00 -0.315 0.752907
## V14.India -1.720e+00 1.405e+00 -1.224 0.221006
## V14.Iran -1.324e+00 1.384e+00 -0.957 0.338707
## V14.Ireland 2.515e+01 2.363e+05 0.000 0.999915
## V14.Italy 2.553e+01 1.080e+05 0.000 0.999811
## V14.Jamaica 2.181e+01 2.216e+05 0.000 0.999921
## V14.Japan -1.285e+00 1.490e+00 -0.862 0.388419
## V14.Laos 2.246e+01 4.943e+05 0.000 0.999964
## V14.Mexico 1.498e+00 1.671e+00 0.897 0.369948
## V14.Nicaragua 2.371e+01 5.176e+05 0.000 0.999963
## V14.Outlying.US.Guam.USVI.etc. NA NA NA NA
## V14.Peru 2.338e+01 3.689e+05 0.000 0.999949
## V14.Philippines -4.424e-01 1.483e+00 -0.298 0.765530
## V14.Poland 2.481e+01 1.578e+05 0.000 0.999875
## V14.Portugal 2.603e+01 3.673e+05 0.000 0.999943
## V14.Puerto.Rico 2.581e+01 2.089e+05 0.000 0.999901
## V14.Scotland 2.722e+01 2.884e+05 0.000 0.999925
## V14.South -9.111e-01 1.561e+00 -0.584 0.559482
## V14.Taiwan -3.596e-01 1.584e+00 -0.227 0.820348
## V14.Thailand 2.479e+01 3.462e+05 0.000 0.999943
## V14.Trinadad.Tobago NA NA NA NA
## V14.United.States 4.263e-01 1.231e+00 0.346 0.729203
## V14.Vietnam 2.250e+01 3.359e+05 0.000 0.999947
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4412.6 on 6559 degrees of freedom
## Residual deviance: 3007.7 on 6469 degrees of freedom
## AIC: 3189.7
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.60.5_n1_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.60.5_n1_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.60.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 54 27
## >50K 7362 2325
##
## Accuracy : 0.2436
## 95% CI : (0.2351, 0.2522)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.002
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.007282
## Specificity : 0.988520
## Pos Pred Value : 0.666667
## Neg Pred Value : 0.240012
## Prevalence : 0.759214
## Detection Rate : 0.005528
## Detection Prevalence : 0.008292
## Balanced Accuracy : 0.497901
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 54 27
## >50K 7362 2325
##
## Accuracy : 0.2436
## 95% CI : (0.2351, 0.2522)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.002
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.007282
## Specificity : 0.988520
## Pos Pred Value : 0.666667
## Neg Pred Value : 0.240012
## Prevalence : 0.759214
## Detection Rate : 0.005528
## Detection Prevalence : 0.008292
## Balanced Accuracy : 0.497901
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n1_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.243550369 -0.002033172 0.235064668 0.252189231 0.759213759
## AccuracyPValue McnemarPValue
## 1.000000000 0.000000000
ad_tda_pc_5.60.5_n1_lr_cf0_ov_acc<-ad_tda_pc_5.60.5_n1_lr_cf0$overall[1]
ad_tda_pc_5.60.5_n1_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.007281553 0.988520408 0.666666667
## Neg Pred Value Precision Recall
## 0.240012388 0.666666667 0.007281553
## F1 Prevalence Detection Rate
## 0.014405762 0.759213759 0.005528256
## Detection Prevalence Balanced Accuracy
## 0.008292383 0.497900981
ad_tda_pc_5.60.5_n1_lr_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n1_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.60.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.60.5_n1_lr_fit_re)
diff_tda_pca_5.60.5_lr_n1_3_fold
## Accuracy
## 1 0.06277456
## 2 -0.01041636
## 3 -0.03673295
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n1_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n1_3_fold_odds.left<-bst_tda_pca_5.60.5_lr.n1_3_fold$probLeft/bst_tda_pca_5.60.5_lr.n1_3_fold$probRight
bst_tda_pca_5.60.5_lr.n1_3_fold_odds.left
## [1] 2
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n1_3_fold
## $winLeft
## [1] 0.4649
##
## $winRope
## [1] 0.06286667
##
## $winRight
## [1] 0.4722333
# Bayesian Correlated Test
bct_tda_pca_5.60.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n1_3_fold
## $left
## [1] 0.3507095
##
## $rope
## [1] 0.1983366
##
## $right
## [1] 0.4509539
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr_n1_3_fold))
#bf_tda_pca_5.60.5_lr.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_lr_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_lr_n1_3_fold)
## t = 0.17496, df = 2, p-value = 0.8772
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1228762 0.1332930
## sample estimates:
## mean of x
## 0.005208417
### Test set diff
diff_tda_pca_5.60.5_lr.n1_test<-(lr_cf_ov_acc - ad_tda_pc_5.60.5_n1_lr_cf0_ov_acc)
diff_tda_pca_5.60.5_lr.n1_test
## Accuracy
## 0.6097461
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr.n1_test),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n1_test_odds.left<-bst_tda_pca_5.60.5_lr.n1_test$probLeft/bst_tda_pca_5.60.5_lr.n1_test$probRight
bst_tda_pca_5.60.5_lr.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr.n1_test),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1614667
##
## $winRight
## [1] 0.8385333
# Bayesian Correlated Test
bct_tda_pca_5.60.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_lr.n1_test)))
#BayesFactor
#bf_tda_pca_5.60.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr.n1_test)) #bf_tda_pca_5.60.5_lr.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_lr.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_PC_5.60.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.60.5.n2.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.60.5_n2_LrFit0
## Generalized Linear Model
##
## 13933 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9289, 9288, 9289
## Resampling results:
##
## Accuracy Kappa
## 0.7417642 0.4839514
Adult_TDA_PC_5.60.5_n2_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.7444014 0.4891124 Fold1
## 2 0.7405813 0.4814669 Fold2
## 3 0.7403101 0.4812748 Fold3
ad_tda_pc_5.60.5_n2_lr_fit_re<-Adult_TDA_PC_5.60.5_n2_LrFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n2_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.540e+12 7.197e+12 -0.353 0.724140
## V1 1.791e-02 1.932e-03 9.270 < 2e-16 ***
## V2.. 2.540e+12 7.197e+12 0.353 0.724140
## V2.Federal.gov 2.540e+12 7.197e+12 0.353 0.724140
## V2.Local.gov 2.540e+12 7.197e+12 0.353 0.724140
## V2.Never.worked NA NA NA NA
## V2.Private 2.540e+12 7.197e+12 0.353 0.724140
## V2.Self.emp.inc 2.540e+12 7.197e+12 0.353 0.724140
## V2.Self.emp.not.inc 2.540e+12 7.197e+12 0.353 0.724140
## V2.State.gov 2.540e+12 7.197e+12 0.353 0.724140
## V2.Without.pay 2.540e+12 7.197e+12 0.353 0.724140
## V3 8.402e-07 2.057e-07 4.084 4.42e-05 ***
## V4.10th -9.815e-01 1.687e-01 -5.817 6.00e-09 ***
## V4.11th -1.205e+00 1.842e-01 -6.541 6.12e-11 ***
## V4.12th -4.500e-01 2.637e-01 -1.706 0.087949 .
## V4.1st.4th -9.174e-01 5.030e-01 -1.824 0.068184 .
## V4.5th.6th -1.100e+00 3.354e-01 -3.280 0.001040 **
## V4.7th.8th -1.545e+00 1.960e-01 -7.882 3.22e-15 ***
## V4.9th -1.463e+00 2.653e-01 -5.514 3.50e-08 ***
## V4.Assoc.acdm 1.239e-01 1.181e-01 1.049 0.294054
## V4.Assoc.voc 8.890e-02 9.924e-02 0.896 0.370377
## V4.Bachelors 6.616e-01 6.597e-02 10.028 < 2e-16 ***
## V4.Doctorate 1.196e+00 1.798e-01 6.649 2.96e-11 ***
## V4.HS.grad -3.164e-01 5.747e-02 -5.506 3.68e-08 ***
## V4.Masters 9.190e-01 9.735e-02 9.441 < 2e-16 ***
## V4.Preschool -2.569e+01 2.474e+05 0.000 0.999917
## V4.Prof.school 1.122e+00 1.570e-01 7.144 9.08e-13 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -4.346e-01 5.937e-01 -0.732 0.464162
## V6.Married.AF.spouse 6.577e-02 1.223e+00 0.054 0.957114
## V6.Married.civ.spouse -1.116e+00 7.637e-01 -1.461 0.144052
## V6.Married.spouse.absent 1.377e-01 8.402e-01 0.164 0.869811
## V6.Never.married -1.517e-02 6.158e-01 -0.025 0.980343
## V6.Separated 4.035e-01 8.480e-01 0.476 0.634205
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.935e-01 1.216e-01 2.414 0.015793 *
## V7.Armed.Forces -5.696e-01 1.810e+00 -0.315 0.753045
## V7.Craft.repair 1.675e-01 8.960e-02 1.869 0.061633 .
## V7.Exec.managerial 9.282e-01 9.514e-02 9.756 < 2e-16 ***
## V7.Farming.fishing -6.902e-01 1.476e-01 -4.677 2.92e-06 ***
## V7.Handlers.cleaners -4.607e-01 1.592e-01 -2.895 0.003796 **
## V7.Machine.op.inspct -4.459e-02 1.144e-01 -0.390 0.696749
## V7.Other.service -5.924e-01 1.597e-01 -3.710 0.000207 ***
## V7.Priv.house.serv -2.805e+01 3.545e+05 0.000 0.999937
## V7.Prof.specialty 6.286e-01 1.042e-01 6.032 1.62e-09 ***
## V7.Protective.serv 6.409e-01 1.431e-01 4.477 7.56e-06 ***
## V7.Sales 4.782e-01 9.792e-02 4.884 1.04e-06 ***
## V7.Tech.support 9.256e-01 1.408e-01 6.573 4.92e-11 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.011e+00 7.981e-01 -1.267 0.205243
## V8.Not.in.family -6.959e-01 9.340e-01 -0.745 0.456243
## V8.Other.relative -9.407e-01 8.818e-01 -1.067 0.286043
## V8.Own.child -4.056e-01 9.281e-01 -0.437 0.662111
## V8.Unmarried -1.216e-02 1.019e+00 -0.012 0.990480
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -8.232e-01 2.960e-01 -2.781 0.005419 **
## V9.Asian.Pac.Islander 2.100e-01 1.929e-01 1.089 0.276199
## V9.Black 3.402e-01 1.097e-01 3.102 0.001919 **
## V9.Other -2.035e-01 3.513e-01 -0.579 0.562422
## V9.White NA NA NA NA
## V10.Female 1.919e+00 7.830e-01 2.452 0.014226 *
## V10.Male NA NA NA NA
## V11 2.816e-04 1.363e-05 20.664 < 2e-16 ***
## V12 6.165e-04 4.545e-05 13.565 < 2e-16 ***
## V13 2.174e-02 1.928e-03 11.278 < 2e-16 ***
## V14.. -5.315e-01 7.048e-01 -0.754 0.450726
## V14.Cambodia 1.620e+00 1.122e+00 1.444 0.148852
## V14.Canada 1.161e-01 7.534e-01 0.154 0.877581
## V14.China -1.211e+00 8.141e-01 -1.487 0.136948
## V14.Columbia -2.552e+00 1.135e+00 -2.248 0.024594 *
## V14.Cuba 3.775e-01 7.819e-01 0.483 0.629208
## V14.Dominican.Republic -2.582e+01 1.127e+05 0.000 0.999817
## V14.Ecuador -4.815e-01 1.013e+00 -0.475 0.634688
## V14.El.Salvador -8.691e-01 8.788e-01 -0.989 0.322668
## V14.England 1.384e-01 7.961e-01 0.174 0.862013
## V14.France -3.852e-01 9.261e-01 -0.416 0.677463
## V14.Germany 2.855e-01 7.514e-01 0.380 0.703941
## V14.Greece -1.523e+00 9.397e-01 -1.621 0.105050
## V14.Guatemala -2.287e+00 1.594e+00 -1.435 0.151394
## V14.Haiti -2.895e-01 1.143e+00 -0.253 0.800133
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras 2.105e+01 3.648e+05 0.000 0.999954
## V14.Hong -3.825e-01 1.026e+00 -0.373 0.709288
## V14.Hungary -3.069e-01 1.203e+00 -0.255 0.798705
## V14.India -9.404e-01 7.647e-01 -1.230 0.218780
## V14.Iran -9.125e-02 8.493e-01 -0.107 0.914433
## V14.Ireland 5.973e-01 1.140e+00 0.524 0.600442
## V14.Italy 1.792e-01 7.733e-01 0.232 0.816739
## V14.Jamaica 1.145e-01 9.339e-01 0.123 0.902383
## V14.Japan -5.800e-01 8.255e-01 -0.703 0.482321
## V14.Laos -1.330e+00 1.409e+00 -0.944 0.345416
## V14.Mexico -6.616e-01 7.311e-01 -0.905 0.365485
## V14.Nicaragua -1.569e+00 1.175e+00 -1.336 0.181675
## V14.Outlying.US.Guam.USVI.etc. -2.521e+01 3.035e+05 0.000 0.999934
## V14.Peru -9.033e-01 1.138e+00 -0.794 0.427428
## V14.Philippines 3.706e-01 7.670e-01 0.483 0.629029
## V14.Poland -2.732e-01 8.224e-01 -0.332 0.739751
## V14.Portugal -7.221e-01 1.082e+00 -0.668 0.504426
## V14.Puerto.Rico -8.384e-01 8.555e-01 -0.980 0.327096
## V14.Scotland 6.170e-01 1.419e+00 0.435 0.663727
## V14.South -1.078e+00 8.462e-01 -1.274 0.202666
## V14.Taiwan -6.791e-01 8.524e-01 -0.797 0.425635
## V14.Thailand -2.801e-01 1.292e+00 -0.217 0.828354
## V14.Trinadad.Tobago 1.411e+00 1.628e+00 0.867 0.386101
## V14.United.States -1.153e-01 6.871e-01 -0.168 0.866787
## V14.Vietnam -2.121e+00 1.064e+00 -1.994 0.046145 *
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 19311 on 13932 degrees of freedom
## Residual deviance: 14308 on 13835 degrees of freedom
## AIC: 14504
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.60.5_n2_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.60.5_n2_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.60.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 2775 623
## >50K 4641 1729
##
## Accuracy : 0.4611
## 95% CI : (0.4512, 0.471)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.069
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.3742
## Specificity : 0.7351
## Pos Pred Value : 0.8167
## Neg Pred Value : 0.2714
## Prevalence : 0.7592
## Detection Rate : 0.2841
## Detection Prevalence : 0.3479
## Balanced Accuracy : 0.5547
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 2775 623
## >50K 4641 1729
##
## Accuracy : 0.4611
## 95% CI : (0.4512, 0.471)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.069
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.3742
## Specificity : 0.7351
## Pos Pred Value : 0.8167
## Neg Pred Value : 0.2714
## Prevalence : 0.7592
## Detection Rate : 0.2841
## Detection Prevalence : 0.3479
## Balanced Accuracy : 0.5547
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n2_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.46109746 0.06904083 0.45117366 0.47104447 0.75921376
## AccuracyPValue McnemarPValue
## 1.00000000 0.00000000
ad_tda_pc_5.60.5_n2_lr_cf0_ov_acc<-ad_tda_pc_5.60.5_n2_lr_cf0$overall[1]
ad_tda_pc_5.60.5_n2_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.3741909 0.7351190 0.8166569
## Neg Pred Value Precision Recall
## 0.2714286 0.8166569 0.3741909
## F1 Prevalence Detection Rate
## 0.5132236 0.7592138 0.2840909
## Detection Prevalence Balanced Accuracy
## 0.3478706 0.5546550
ad_tda_pc_5.60.5_n2_lr_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n2_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.60.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.60.5_n2_lr_fit_re)
diff_tda_pca_5.60.5_lr_n2_3_fold
## Accuracy
## 1 0.1052959
## 2 0.1076483
## 3 0.1150466
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n2_3_fold_odds.left<-bst_tda_pca_5.60.5_lr.n2_3_fold$probLeft/bst_tda_pca_5.60.5_lr.n2_3_fold$probRight
bst_tda_pca_5.60.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0092
##
## $winRight
## [1] 0.9908
# Bayesian Correlated Test
bct_tda_pca_5.60.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n2_3_fold
## $left
## [1] 0.0004035568
##
## $rope
## [1] 0.0001785595
##
## $right
## [1] 0.9994179
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr_n2_3_fold))
#bf_tda_pca_5.60.5_lr.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_lr_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_lr_n2_3_fold)
## t = 37.216, df = 2, p-value = 0.0007212
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.09669024 0.12197029
## sample estimates:
## mean of x
## 0.1093303
### Test set diff
diff_tda_pca_5.60.5_lr.n2_test<-(lr_cf_ov_acc - ad_tda_pc_5.60.5_n2_lr_cf0_ov_acc)
diff_tda_pca_5.60.5_lr.n2_test
## Accuracy
## 0.392199
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr.n2_test),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n2_test_odds.left<-bst_tda_pca_5.60.5_lr.n2_test$probLeft/bst_tda_pca_5.60.5_lr.n2_test$probRight
bst_tda_pca_5.60.5_lr.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr.n2_test),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1571667
##
## $winRight
## [1] 0.8428333
# Bayesian Correlated Test
bct_tda_pca_5.60.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_lr.n2_test)))
#BayesFactor
#bf_tda_pca_5.60.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr.n2_test)) #bf_tda_pca_5.60.5_lr.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_lr.n2_test))
##Node3
Adult_TDA_PC_5.60.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.60.5.n3.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.60.5_n3_LrFit0
## Generalized Linear Model
##
## 15744 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 10496, 10496, 10496
## Resampling results:
##
## Accuracy Kappa
## 0.7858867 0.317527
Adult_TDA_PC_5.60.5_n3_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.7877287 0.3279498 Fold1
## 2 0.7888720 0.3253192 Fold2
## 3 0.7810595 0.2993121 Fold3
ad_tda_pc_5.60.5_n3_lr_fit_re<-Adult_TDA_PC_5.60.5_n2_LrFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n3_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.582e+12 4.340e+12 1.056 0.291058
## V1 1.392e-02 1.923e-03 7.241 4.46e-13 ***
## V2.. -4.582e+12 4.340e+12 -1.056 0.291058
## V2.Federal.gov -4.582e+12 4.340e+12 -1.056 0.291058
## V2.Local.gov -4.582e+12 4.340e+12 -1.056 0.291058
## V2.Never.worked NA NA NA NA
## V2.Private -4.582e+12 4.340e+12 -1.056 0.291058
## V2.Self.emp.inc -4.582e+12 4.340e+12 -1.056 0.291058
## V2.Self.emp.not.inc -4.582e+12 4.340e+12 -1.056 0.291058
## V2.State.gov -4.582e+12 4.340e+12 -1.056 0.291058
## V2.Without.pay -4.582e+12 4.340e+12 -1.056 0.291058
## V3 9.048e-07 2.000e-07 4.524 6.06e-06 ***
## V4.10th -7.706e-01 1.584e-01 -4.866 1.14e-06 ***
## V4.11th -8.344e-01 1.631e-01 -5.116 3.12e-07 ***
## V4.12th -5.064e-01 2.415e-01 -2.097 0.035970 *
## V4.1st.4th -1.371e+00 4.832e-01 -2.837 0.004552 **
## V4.5th.6th -1.167e+00 3.041e-01 -3.839 0.000124 ***
## V4.7th.8th -1.599e+00 2.160e-01 -7.405 1.31e-13 ***
## V4.9th -1.214e+00 2.309e-01 -5.259 1.45e-07 ***
## V4.Assoc.acdm -3.880e-01 1.180e-01 -3.288 0.001008 **
## V4.Assoc.voc -2.549e-01 1.016e-01 -2.509 0.012092 *
## V4.Bachelors -3.628e-01 7.009e-02 -5.176 2.26e-07 ***
## V4.Doctorate 4.003e-01 1.956e-01 2.047 0.040671 *
## V4.HS.grad -3.999e-01 5.627e-02 -7.106 1.20e-12 ***
## V4.Masters -3.452e-01 1.053e-01 -3.280 0.001040 **
## V4.Preschool -3.223e+01 6.791e+04 0.000 0.999621
## V4.Prof.school 1.446e-01 1.808e-01 0.800 0.423960
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -3.611e-01 1.766e-01 -2.045 0.040832 *
## V6.Married.AF.spouse 2.217e+00 6.919e-01 3.203 0.001358 **
## V6.Married.civ.spouse 7.560e-01 3.157e-01 2.394 0.016651 *
## V6.Married.spouse.absent -2.923e-01 2.872e-01 -1.018 0.308726
## V6.Never.married -3.957e-01 1.838e-01 -2.153 0.031352 *
## V6.Separated -2.464e-01 2.360e-01 -1.044 0.296434
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 5.636e-01 1.104e-01 5.106 3.29e-07 ***
## V7.Armed.Forces -2.483e+01 1.748e+05 0.000 0.999887
## V7.Craft.repair 1.507e-01 9.271e-02 1.626 0.103949
## V7.Exec.managerial 3.818e-01 1.012e-01 3.773 0.000161 ***
## V7.Farming.fishing -1.306e+00 2.163e-01 -6.039 1.55e-09 ***
## V7.Handlers.cleaners -3.150e-01 1.519e-01 -2.075 0.038024 *
## V7.Machine.op.inspct 4.006e-03 1.124e-01 0.036 0.971569
## V7.Other.service -2.905e-01 1.342e-01 -2.165 0.030399 *
## V7.Priv.house.serv -3.803e+00 3.995e+00 -0.952 0.341151
## V7.Prof.specialty 2.737e-01 1.085e-01 2.522 0.011679 *
## V7.Protective.serv 3.327e-01 1.581e-01 2.104 0.035376 *
## V7.Sales 4.347e-01 1.007e-01 4.317 1.58e-05 ***
## V7.Tech.support 9.375e-01 1.325e-01 7.073 1.51e-12 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -3.633e-01 1.164e-01 -3.122 0.001799 **
## V8.Not.in.family 2.668e-01 2.831e-01 0.942 0.346048
## V8.Other.relative -4.646e-01 2.622e-01 -1.772 0.076466 .
## V8.Own.child -5.271e-01 2.791e-01 -1.889 0.058932 .
## V8.Unmarried 3.425e-01 2.960e-01 1.157 0.247317
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.648e-02 2.434e-01 -0.068 0.946019
## V9.Asian.Pac.Islander 4.562e-01 1.772e-01 2.574 0.010039 *
## V9.Black 5.699e-01 8.518e-02 6.691 2.22e-11 ***
## V9.Other 2.357e-01 2.941e-01 0.801 0.422909
## V9.White NA NA NA NA
## V10.Female 1.112e+00 9.682e-02 11.487 < 2e-16 ***
## V10.Male NA NA NA NA
## V11 3.058e-04 1.211e-05 25.244 < 2e-16 ***
## V12 3.456e-04 4.594e-05 7.523 5.33e-14 ***
## V13 1.414e-02 1.951e-03 7.247 4.26e-13 ***
## V14.. -6.466e-01 8.317e-01 -0.777 0.436930
## V14.Cambodia 6.247e-01 1.027e+00 0.608 0.543006
## V14.Canada -4.614e-01 8.800e-01 -0.524 0.600107
## V14.China -1.712e+00 9.647e-01 -1.775 0.075884 .
## V14.Columbia -2.332e+00 1.317e+00 -1.771 0.076622 .
## V14.Cuba -7.632e-02 8.938e-01 -0.085 0.931951
## V14.Dominican.Republic -1.770e+00 1.324e+00 -1.337 0.181342
## V14.Ecuador -6.519e-01 1.128e+00 -0.578 0.563420
## V14.El.Salvador -1.134e+00 1.011e+00 -1.122 0.262008
## V14.England -1.952e-01 8.959e-01 -0.218 0.827536
## V14.France -3.075e-02 1.037e+00 -0.030 0.976332
## V14.Germany 7.704e-03 8.649e-01 0.009 0.992892
## V14.Greece -1.755e+00 1.165e+00 -1.506 0.132101
## V14.Guatemala -5.952e-01 1.108e+00 -0.537 0.591125
## V14.Haiti -1.119e-01 1.078e+00 -0.104 0.917297
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras 2.470e+01 3.840e+05 0.000 0.999949
## V14.Hong -3.123e-01 1.108e+00 -0.282 0.778044
## V14.Hungary -1.493e+00 1.391e+00 -1.074 0.282939
## V14.India -9.342e-01 9.080e-01 -1.029 0.303560
## V14.Iran -1.073e+00 1.040e+00 -1.032 0.302243
## V14.Ireland -3.300e-01 1.074e+00 -0.307 0.758710
## V14.Italy 7.604e-02 8.998e-01 0.085 0.932655
## V14.Jamaica -1.725e-01 9.408e-01 -0.183 0.854512
## V14.Japan -2.401e-01 9.347e-01 -0.257 0.797289
## V14.Laos -1.481e+00 1.382e+00 -1.071 0.284098
## V14.Mexico -1.074e+00 8.492e-01 -1.265 0.205822
## V14.Nicaragua -1.131e+00 1.139e+00 -0.994 0.320430
## V14.Outlying.US.Guam.USVI.etc. -2.574e+01 1.476e+05 0.000 0.999861
## V14.Peru -1.406e+00 1.363e+00 -1.031 0.302445
## V14.Philippines -3.709e-02 8.643e-01 -0.043 0.965766
## V14.Poland -5.510e-01 9.317e-01 -0.591 0.554294
## V14.Portugal -7.682e-01 1.128e+00 -0.681 0.495841
## V14.Puerto.Rico -6.534e-01 9.283e-01 -0.704 0.481490
## V14.Scotland -6.511e-01 1.263e+00 -0.516 0.606184
## V14.South -1.038e+00 9.463e-01 -1.097 0.272857
## V14.Taiwan -4.993e-01 9.980e-01 -0.500 0.616876
## V14.Thailand -1.574e+00 1.388e+00 -1.134 0.256650
## V14.Trinadad.Tobago -5.416e-01 1.212e+00 -0.447 0.654967
## V14.United.States -3.648e-01 8.159e-01 -0.447 0.654793
## V14.Vietnam -1.364e+00 1.018e+00 -1.341 0.180068
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 17906 on 15743 degrees of freedom
## Residual deviance: 14521 on 15646 degrees of freedom
## AIC: 14717
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.60.5_n3_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.60.5_n3_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.60.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6827 1701
## >50K 589 651
##
## Accuracy : 0.7656
## 95% CI : (0.757, 0.7739)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.07246
##
## Kappa : 0.2354
##
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.9206
## Specificity : 0.2768
## Pos Pred Value : 0.8005
## Neg Pred Value : 0.5250
## Prevalence : 0.7592
## Detection Rate : 0.6989
## Detection Prevalence : 0.8731
## Balanced Accuracy : 0.5987
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6827 1701
## >50K 589 651
##
## Accuracy : 0.7656
## 95% CI : (0.757, 0.7739)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.07246
##
## Kappa : 0.2354
##
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.9206
## Specificity : 0.2768
## Pos Pred Value : 0.8005
## Neg Pred Value : 0.5250
## Prevalence : 0.7592
## Detection Rate : 0.6989
## Detection Prevalence : 0.8731
## Balanced Accuracy : 0.5987
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n3_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.655610e-01 2.353541e-01 7.570300e-01 7.739334e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 7.246161e-02 3.104101e-119
ad_tda_pc_5.60.5_n3_lr_cf0_ov_acc<-ad_tda_pc_5.60.5_n3_lr_cf0$overall[1]
ad_tda_pc_5.60.5_n3_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9205771 0.2767857 0.8005394
## Neg Pred Value Precision Recall
## 0.5250000 0.8005394 0.9205771
## F1 Prevalence Detection Rate
## 0.8563723 0.7592138 0.6989148
## Detection Prevalence Balanced Accuracy
## 0.8730549 0.5986814
ad_tda_pc_5.60.5_n3_lr_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n3_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.60.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.60.5_n3_lr_fit_re)
diff_tda_pca_5.60.5_lr_n3_3_fold
## Accuracy
## 1 0.1052959
## 2 0.1076483
## 3 0.1150466
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n3_3_fold_odds.left<-bst_tda_pca_5.60.5_lr.n3_3_fold$probLeft/bst_tda_pca_5.60.5_lr.n3_3_fold$probRight
bst_tda_pca_5.60.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.008533333
##
## $winRight
## [1] 0.9914667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n3_3_fold
## $left
## [1] 0.0004035568
##
## $rope
## [1] 0.0001785595
##
## $right
## [1] 0.9994179
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr_n3_3_fold))
#bf_tda_pca_5.60.5_lr.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_lr_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_lr_n3_3_fold)
## t = 37.216, df = 2, p-value = 0.0007212
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.09669024 0.12197029
## sample estimates:
## mean of x
## 0.1093303
### Test set diff
diff_tda_pca_5.60.5_lr.n3_test<-(lr_cf_ov_acc - ad_tda_pc_5.60.5_n3_lr_cf0_ov_acc)
diff_tda_pca_5.60.5_lr.n3_test
## Accuracy
## 0.08773546
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr.n3_test),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n3_test_odds.left<-bst_tda_pca_5.60.5_lr.n3_test$probLeft/bst_tda_pca_5.60.5_lr.n3_test$probRight
bst_tda_pca_5.60.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_lr.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr.n3_test),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1600667
##
## $winRight
## [1] 0.8399333
# Bayesian Correlated Test
bct_tda_pca_5.60.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_lr.n3_test)))
#BayesFactor
#bf_tda_pca_5.60.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr.n3_test)) #bf_tda_pca_5.60.5_lr.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_lr.n3_test))
##Node4
Adult_TDA_PC_5.60.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.60.5.n4.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.60.5_n4_LrFit0
## Generalized Linear Model
##
## 19829 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 13220, 13219, 13219
## Resampling results:
##
## Accuracy Kappa
## 0.9419035 0.3381238
Adult_TDA_PC_5.60.5_n4_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.9470419 0.4066884 Fold1
## 2 0.9403933 0.3012704 Fold2
## 3 0.9382753 0.3064126 Fold3
ad_tda_pc_5.60.5_n4_lr_fit_re<-Adult_TDA_PC_5.60.5_n4_LrFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n4_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.305e+12 1.359e+13 -0.390 0.696199
## V1 2.776e-02 3.297e-03 8.420 < 2e-16 ***
## V2.. 5.305e+12 1.359e+13 0.390 0.696199
## V2.Federal.gov 5.305e+12 1.359e+13 0.390 0.696199
## V2.Local.gov 5.305e+12 1.359e+13 0.390 0.696199
## V2.Never.worked 5.305e+12 1.359e+13 0.390 0.696199
## V2.Private 5.305e+12 1.359e+13 0.390 0.696199
## V2.Self.emp.inc 5.305e+12 1.359e+13 0.390 0.696199
## V2.Self.emp.not.inc 5.305e+12 1.359e+13 0.390 0.696199
## V2.State.gov 5.305e+12 1.359e+13 0.390 0.696199
## V2.Without.pay 5.305e+12 1.359e+13 0.390 0.696199
## V3 6.440e-07 3.267e-07 1.971 0.048707 *
## V4.10th -1.270e+00 3.938e-01 -3.225 0.001259 **
## V4.11th -4.071e-01 2.854e-01 -1.427 0.153723
## V4.12th -8.291e-01 4.464e-01 -1.857 0.063261 .
## V4.1st.4th -1.628e+00 1.065e+00 -1.529 0.126354
## V4.5th.6th -1.222e+00 6.613e-01 -1.848 0.064662 .
## V4.7th.8th -1.208e+00 4.595e-01 -2.628 0.008593 **
## V4.9th -4.071e-01 3.750e-01 -1.086 0.277622
## V4.Assoc.acdm 1.706e-01 1.730e-01 0.986 0.323958
## V4.Assoc.voc 2.550e-01 1.642e-01 1.553 0.120395
## V4.Bachelors 5.677e-01 1.074e-01 5.284 1.26e-07 ***
## V4.Doctorate 1.557e+00 2.909e-01 5.351 8.76e-08 ***
## V4.HS.grad -3.858e-01 1.025e-01 -3.766 0.000166 ***
## V4.Masters 6.479e-01 1.577e-01 4.110 3.96e-05 ***
## V4.Preschool -4.190e+01 1.103e+07 0.000 0.999997
## V4.Prof.school 8.962e-01 2.994e-01 2.993 0.002763 **
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 3.473e-02 1.737e-01 0.200 0.841511
## V6.Married.AF.spouse 2.923e+00 7.134e-01 4.097 4.18e-05 ***
## V6.Married.civ.spouse 1.946e+00 3.627e-01 5.366 8.05e-08 ***
## V6.Married.spouse.absent -1.693e-02 2.975e-01 -0.057 0.954608
## V6.Never.married -2.070e-01 1.897e-01 -1.091 0.275098
## V6.Separated -1.458e-01 2.399e-01 -0.608 0.543274
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical -3.826e-02 2.230e-01 -0.172 0.863768
## V7.Armed.Forces -2.332e+01 1.260e+05 0.000 0.999852
## V7.Craft.repair 1.128e-01 2.336e-01 0.483 0.629119
## V7.Exec.managerial 4.956e-01 2.222e-01 2.231 0.025686 *
## V7.Farming.fishing -1.435e+00 5.115e-01 -2.806 0.005017 **
## V7.Handlers.cleaners -9.752e-01 3.890e-01 -2.507 0.012176 *
## V7.Machine.op.inspct -9.598e-01 2.969e-01 -3.232 0.001228 **
## V7.Other.service -7.050e-01 2.497e-01 -2.824 0.004747 **
## V7.Priv.house.serv -4.524e+00 2.340e+00 -1.933 0.053264 .
## V7.Prof.specialty 2.273e-01 2.284e-01 0.995 0.319643
## V7.Protective.serv 8.066e-01 3.098e-01 2.604 0.009218 **
## V7.Sales 2.258e-01 2.264e-01 0.998 0.318509
## V7.Tech.support 3.456e-01 2.608e-01 1.325 0.185172
## V7.Transport.moving NA NA NA NA
## V8.Husband -6.587e+00 5.914e-01 -11.139 < 2e-16 ***
## V8.Not.in.family -6.209e-01 3.335e-01 -1.862 0.062662 .
## V8.Other.relative -1.940e+00 3.257e-01 -5.958 2.56e-09 ***
## V8.Own.child -1.766e+00 3.248e-01 -5.437 5.40e-08 ***
## V8.Unmarried -6.939e-01 3.429e-01 -2.024 0.042995 *
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 4.993e-02 3.405e-01 0.147 0.883427
## V9.Asian.Pac.Islander 1.831e-01 2.717e-01 0.674 0.500417
## V9.Black -1.246e-01 1.249e-01 -0.997 0.318693
## V9.Other 1.680e-01 4.192e-01 0.401 0.688620
## V9.White NA NA NA NA
## V10.Female -2.862e-01 9.215e-02 -3.106 0.001894 **
## V10.Male NA NA NA NA
## V11 3.687e-04 1.590e-05 23.189 < 2e-16 ***
## V12 5.003e-04 7.319e-05 6.836 8.17e-12 ***
## V13 2.709e-02 3.056e-03 8.866 < 2e-16 ***
## V14.. -2.295e+00 1.292e+00 -1.777 0.075619 .
## V14.Cambodia -3.397e+00 2.440e+00 -1.392 0.163797
## V14.Canada -2.147e+00 1.362e+00 -1.576 0.114969
## V14.China -2.031e+00 1.434e+00 -1.416 0.156711
## V14.Columbia -2.862e+00 1.651e+00 -1.734 0.083007 .
## V14.Cuba -2.645e+00 1.423e+00 -1.859 0.063092 .
## V14.Dominican.Republic -2.946e+00 1.659e+00 -1.776 0.075798 .
## V14.Ecuador -1.269e+00 1.684e+00 -0.754 0.451137
## V14.El.Salvador -2.178e+00 1.514e+00 -1.438 0.150320
## V14.England -2.254e+00 1.378e+00 -1.636 0.101755
## V14.France -1.722e+00 1.480e+00 -1.163 0.244717
## V14.Germany -2.260e+00 1.353e+00 -1.670 0.094911 .
## V14.Greece -1.731e+00 1.700e+00 -1.018 0.308684
## V14.Guatemala -7.303e-01 1.498e+00 -0.487 0.625997
## V14.Haiti -1.757e+00 1.536e+00 -1.144 0.252519
## V14.Holand.Netherlands -2.393e+01 3.511e+05 0.000 0.999946
## V14.Honduras -2.518e+01 1.051e+05 0.000 0.999809
## V14.Hong -2.495e+00 1.750e+00 -1.426 0.154003
## V14.Hungary -2.131e+00 1.700e+00 -1.254 0.209987
## V14.India -1.853e+00 1.417e+00 -1.308 0.190981
## V14.Iran -2.620e+01 8.169e+04 0.000 0.999744
## V14.Ireland -2.059e+00 1.658e+00 -1.242 0.214323
## V14.Italy -8.864e-01 1.388e+00 -0.639 0.523045
## V14.Jamaica -1.427e+00 1.446e+00 -0.987 0.323868
## V14.Japan -9.820e-01 1.391e+00 -0.706 0.480183
## V14.Laos -1.933e+00 1.718e+00 -1.125 0.260601
## V14.Mexico -2.510e+00 1.332e+00 -1.884 0.059546 .
## V14.Nicaragua -1.809e+00 1.724e+00 -1.049 0.293962
## V14.Outlying.US.Guam.USVI.etc. -2.642e+01 9.785e+04 0.000 0.999785
## V14.Peru -2.528e+01 6.768e+04 0.000 0.999702
## V14.Philippines -1.991e+00 1.345e+00 -1.480 0.138777
## V14.Poland -1.990e+00 1.481e+00 -1.344 0.178998
## V14.Portugal -1.501e+00 1.528e+00 -0.982 0.325986
## V14.Puerto.Rico -2.261e+00 1.373e+00 -1.647 0.099634 .
## V14.Scotland -2.751e+00 1.801e+00 -1.527 0.126728
## V14.South -3.475e+00 1.529e+00 -2.273 0.023021 *
## V14.Taiwan -1.683e+00 1.482e+00 -1.136 0.255930
## V14.Thailand -2.815e+00 1.765e+00 -1.595 0.110661
## V14.Trinadad.Tobago -2.597e+01 9.829e+04 0.000 0.999789
## V14.United.States -2.160e+00 1.265e+00 -1.707 0.087761 .
## V14.Vietnam -2.191e+00 1.538e+00 -1.424 0.154365
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 9522.7 on 19828 degrees of freedom
## Residual deviance: 6139.2 on 19729 degrees of freedom
## AIC: 6339.2
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.60.5_n4_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.60.5_n4_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.60.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7356 2061
## >50K 60 291
##
## Accuracy : 0.7829
## 95% CI : (0.7746, 0.791)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1.727e-08
##
## Kappa : 0.163
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9919
## Specificity : 0.1237
## Pos Pred Value : 0.7811
## Neg Pred Value : 0.8291
## Prevalence : 0.7592
## Detection Rate : 0.7531
## Detection Prevalence : 0.9641
## Balanced Accuracy : 0.5578
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7356 2061
## >50K 60 291
##
## Accuracy : 0.7829
## 95% CI : (0.7746, 0.791)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1.727e-08
##
## Kappa : 0.163
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9919
## Specificity : 0.1237
## Pos Pred Value : 0.7811
## Neg Pred Value : 0.8291
## Prevalence : 0.7592
## Detection Rate : 0.7531
## Detection Prevalence : 0.9641
## Balanced Accuracy : 0.5578
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n4_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.828624e-01 1.629729e-01 7.745512e-01 7.910046e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.727476e-08 0.000000e+00
ad_tda_pc_5.60.5_n4_lr_cf0_ov_acc<-ad_tda_pc_5.60.5_n4_lr_cf0$overall[1]
ad_tda_pc_5.60.5_n4_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9919094 0.1237245 0.7811405
## Neg Pred Value Precision Recall
## 0.8290598 0.7811405 0.9919094
## F1 Prevalence Detection Rate
## 0.8739975 0.7592138 0.7530713
## Detection Prevalence Balanced Accuracy
## 0.9640663 0.5578169
ad_tda_pc_5.60.5_n4_lr_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n4_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.60.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.60.5_n4_lr_fit_re)
diff_tda_pca_5.60.5_lr_n4_3_fold
## Accuracy
## 1 -0.09734462
## 2 -0.09216378
## 3 -0.08291867
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n4_3_fold_odds.left<-bst_tda_pca_5.60.5_lr.n4_3_fold$probLeft/bst_tda_pca_5.60.5_lr.n4_3_fold$probRight
bst_tda_pca_5.60.5_lr.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n4_3_fold
## $winLeft
## [1] 0.9918667
##
## $winRope
## [1] 0.008133333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n4_3_fold
## $left
## [1] 0.9981925
##
## $rope
## [1] 0.0006438001
##
## $right
## [1] 0.001163699
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr_n4_3_fold))
#bf_tda_pca_5.60.5_lr.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_lr_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_lr_n4_3_fold)
## t = -21.523, df = 2, p-value = 0.002152
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1089625 -0.0726555
## sample estimates:
## mean of x
## -0.09080902
### Test set diff
diff_tda_pca_5.60.5_lr.n4_test<-(lr_cf_ov_acc - ad_tda_pc_5.60.5_n4_lr_cf0_ov_acc)
diff_tda_pca_5.60.5_lr.n4_test
## Accuracy
## 0.07043407
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr.n4_test),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n4_test_odds.left<-bst_tda_pca_5.60.5_lr.n4_test$probLeft/bst_tda_pca_5.60.5_lr.n4_test$probRight
bst_tda_pca_5.60.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr.n4_test),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1604667
##
## $winRight
## [1] 0.8395333
# Bayesian Correlated Test
bct_tda_pca_5.60.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_lr.n4_test)))
#BayesFactor
#bf_tda_pca_5.60.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr.n4_test)) #bf_tda_pca_5.60.5_lr.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_lr.n4_test))
##Node5
Adult_TDA_PC_5.60.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.60.5.n5.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.60.5_n5_LrFit0
## Generalized Linear Model
##
## 16508 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11006, 11004, 11006
## Resampling results:
##
## Accuracy Kappa
## 0.9792246 0.01033087
Adult_TDA_PC_5.60.5_n5_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.9885496 -0.004986866 Fold1
## 2 0.9591206 0.004610428 Fold2
## 3 0.9900036 0.031369034 Fold3
ad_tda_pc_5.60.5_n5_lr_fit_re<-Adult_TDA_PC_5.60.5_n5_LrFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n5_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (10 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.936e+15 4.126e+07 -71154738 <2e-16 ***
## V1 2.795e+12 5.604e+04 49876062 <2e-16 ***
## V2.. 1.720e+15 2.273e+07 75673437 <2e-16 ***
## V2.Federal.gov 5.023e+15 2.273e+07 220983147 <2e-16 ***
## V2.Local.gov 4.783e+15 2.259e+07 211738434 <2e-16 ***
## V2.Never.worked 1.614e+15 3.402e+07 47436536 <2e-16 ***
## V2.Private 1.368e+15 2.247e+07 60866370 <2e-16 ***
## V2.Self.emp.inc 4.270e+15 2.328e+07 183431419 <2e-16 ***
## V2.Self.emp.not.inc 6.485e+14 2.263e+07 28664230 <2e-16 ***
## V2.State.gov 1.237e+15 2.264e+07 54655129 <2e-16 ***
## V2.Without.pay NA NA NA NA
## V3 -1.310e+08 4.998e+00 -26217733 <2e-16 ***
## V4.10th -1.187e+15 2.964e+06 -400354538 <2e-16 ***
## V4.11th -3.707e+13 2.565e+06 -14450207 <2e-16 ***
## V4.12th -4.253e+11 3.914e+06 -108653 <2e-16 ***
## V4.1st.4th -6.758e+14 7.196e+06 -93908497 <2e-16 ***
## V4.5th.6th 3.554e+14 4.552e+06 78080360 <2e-16 ***
## V4.7th.8th -4.865e+13 4.112e+06 -11829978 <2e-16 ***
## V4.9th 6.394e+13 4.097e+06 15605585 <2e-16 ***
## V4.Assoc.acdm -1.205e+14 3.003e+06 -40127749 <2e-16 ***
## V4.Assoc.voc -1.032e+14 2.837e+06 -36378165 <2e-16 ***
## V4.Bachelors -1.550e+14 1.956e+06 -79203626 <2e-16 ***
## V4.Doctorate 1.783e+15 1.417e+07 125811363 <2e-16 ***
## V4.HS.grad -8.817e+13 1.392e+06 -63365127 <2e-16 ***
## V4.Masters -6.340e+13 3.804e+06 -16667070 <2e-16 ***
## V4.Preschool -2.061e+15 1.117e+07 -184560787 <2e-16 ***
## V4.Prof.school 1.630e+15 1.031e+07 158096449 <2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 3.783e+13 2.711e+06 13952512 <2e-16 ***
## V6.Married.AF.spouse 1.308e+15 2.112e+07 61938495 <2e-16 ***
## V6.Married.civ.spouse 1.030e+14 6.505e+06 15831397 <2e-16 ***
## V6.Married.spouse.absent 3.750e+13 4.245e+06 8834211 <2e-16 ***
## V6.Never.married 4.887e+13 2.955e+06 16539569 <2e-16 ***
## V6.Separated -3.250e+13 3.378e+06 -9622182 <2e-16 ***
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical -3.215e+15 3.256e+06 -987396861 <2e-16 ***
## V7.Armed.Forces -7.024e+15 2.777e+07 -252902842 <2e-16 ***
## V7.Craft.repair -3.175e+15 3.399e+06 -934022857 <2e-16 ***
## V7.Exec.managerial -3.161e+15 3.608e+06 -875992005 <2e-16 ***
## V7.Farming.fishing -5.693e+14 4.612e+06 -123425649 <2e-16 ***
## V7.Handlers.cleaners -3.228e+15 3.641e+06 -886584972 <2e-16 ***
## V7.Machine.op.inspct -3.252e+15 3.561e+06 -913261254 <2e-16 ***
## V7.Other.service -3.178e+15 3.221e+06 -986546973 <2e-16 ***
## V7.Priv.house.serv -3.194e+15 6.425e+06 -497125067 <2e-16 ***
## V7.Prof.specialty -3.047e+15 3.662e+06 -832039959 <2e-16 ***
## V7.Protective.serv -2.911e+15 5.518e+06 -527500316 <2e-16 ***
## V7.Sales -3.108e+15 3.357e+06 -925978357 <2e-16 ***
## V7.Tech.support -3.230e+15 4.265e+06 -757421188 <2e-16 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.902e+14 1.739e+07 -10932429 <2e-16 ***
## V8.Not.in.family 9.589e+13 6.529e+06 14686788 <2e-16 ***
## V8.Other.relative 3.574e+13 6.477e+06 5518054 <2e-16 ***
## V8.Own.child 8.360e+13 6.500e+06 12861365 <2e-16 ***
## V8.Unmarried 8.373e+13 6.612e+06 12663374 <2e-16 ***
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 1.125e+14 4.866e+06 23118239 <2e-16 ***
## V9.Asian.Pac.Islander 4.161e+13 4.308e+06 9658458 <2e-16 ***
## V9.Black 6.112e+13 1.597e+06 38268207 <2e-16 ***
## V9.Other 3.183e+14 5.163e+06 61648817 <2e-16 ***
## V9.White NA NA NA NA
## V10.Female 1.187e+14 1.280e+06 92740768 <2e-16 ***
## V10.Male NA NA NA NA
## V11 7.832e+10 3.472e+02 225608643 <2e-16 ***
## V12 -5.422e+10 1.860e+03 -29141741 <2e-16 ***
## V13 1.081e+12 4.891e+04 22096896 <2e-16 ***
## V14.. -5.132e+13 3.384e+07 -1516483 <2e-16 ***
## V14.Cambodia -1.154e+14 4.134e+07 -2790132 <2e-16 ***
## V14.Canada 1.607e+15 3.480e+07 46194406 <2e-16 ***
## V14.China 2.410e+15 3.617e+07 66642546 <2e-16 ***
## V14.Columbia 3.040e+15 3.520e+07 86358347 <2e-16 ***
## V14.Cuba -1.292e+14 3.500e+07 -3692668 <2e-16 ***
## V14.Dominican.Republic 3.231e+15 3.483e+07 92762519 <2e-16 ***
## V14.Ecuador -2.158e+14 3.762e+07 -5735603 <2e-16 ***
## V14.El.Salvador -2.142e+14 3.452e+07 -6205917 <2e-16 ***
## V14.England 3.269e+14 3.523e+07 9279923 <2e-16 ***
## V14.France 2.065e+15 3.977e+07 51937536 <2e-16 ***
## V14.Germany 2.223e+15 3.457e+07 64321883 <2e-16 ***
## V14.Greece -1.385e+14 4.115e+07 -3365698 <2e-16 ***
## V14.Guatemala 2.086e+15 3.499e+07 59615589 <2e-16 ***
## V14.Haiti 2.355e+15 3.546e+07 66407367 <2e-16 ***
## V14.Holand.Netherlands 1.036e+14 7.523e+07 1376528 <2e-16 ***
## V14.Honduras 1.695e+15 3.882e+07 43659366 <2e-16 ***
## V14.Hong 2.794e+15 3.989e+07 70036978 <2e-16 ***
## V14.Hungary 6.959e+14 4.213e+07 16517752 <2e-16 ***
## V14.India 7.609e+14 3.620e+07 21020539 <2e-16 ***
## V14.Iran -1.041e+15 3.974e+07 -26209066 <2e-16 ***
## V14.Ireland 6.450e+14 3.756e+07 17170262 <2e-16 ***
## V14.Italy 2.388e+15 3.667e+07 65124246 <2e-16 ***
## V14.Jamaica 3.887e+14 3.471e+07 11197872 <2e-16 ***
## V14.Japan 4.693e+14 3.597e+07 13048901 <2e-16 ***
## V14.Laos 6.471e+14 3.947e+07 16394801 <2e-16 ***
## V14.Mexico 4.010e+14 3.381e+07 11860351 <2e-16 ***
## V14.Nicaragua 3.787e+15 3.654e+07 103631629 <2e-16 ***
## V14.Outlying.US.Guam.USVI.etc. 3.143e+15 3.879e+07 81014532 <2e-16 ***
## V14.Peru 3.152e+15 3.652e+07 86308999 <2e-16 ***
## V14.Philippines 3.966e+15 3.419e+07 115989282 <2e-16 ***
## V14.Poland -8.437e+14 3.577e+07 -23586483 <2e-16 ***
## V14.Portugal 4.216e+14 3.716e+07 11346808 <2e-16 ***
## V14.Puerto.Rico 2.402e+15 3.448e+07 69667946 <2e-16 ***
## V14.Scotland 2.255e+15 4.336e+07 52008946 <2e-16 ***
## V14.South 2.206e+15 3.552e+07 62124248 <2e-16 ***
## V14.Taiwan 1.321e+15 3.707e+07 35642271 <2e-16 ***
## V14.Thailand 2.586e+15 3.900e+07 66305671 <2e-16 ***
## V14.Trinadad.Tobago 3.073e+15 3.883e+07 79134290 <2e-16 ***
## V14.United.States 2.717e+14 3.360e+07 8086328 <2e-16 ***
## V14.Vietnam 3.196e+15 3.527e+07 90615666 <2e-16 ***
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1518.4 on 16507 degrees of freedom
## Residual deviance: 25374.7 on 16409 degrees of freedom
## AIC: 25573
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.60.5_n5_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.60.5_n5_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.60.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7290 2190
## >50K 126 162
##
## Accuracy : 0.7629
## 95% CI : (0.7543, 0.7713)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.2006
##
## Kappa : 0.0741
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.98301
## Specificity : 0.06888
## Pos Pred Value : 0.76899
## Neg Pred Value : 0.56250
## Prevalence : 0.75921
## Detection Rate : 0.74631
## Detection Prevalence : 0.97052
## Balanced Accuracy : 0.52594
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7290 2190
## >50K 126 162
##
## Accuracy : 0.7629
## 95% CI : (0.7543, 0.7713)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.2006
##
## Kappa : 0.0741
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.98301
## Specificity : 0.06888
## Pos Pred Value : 0.76899
## Neg Pred Value : 0.56250
## Prevalence : 0.75921
## Detection Rate : 0.74631
## Detection Prevalence : 0.97052
## Balanced Accuracy : 0.52594
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n5_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.76289926 0.07408412 0.75433618 0.77130532 0.75921376
## AccuracyPValue McnemarPValue
## 0.20059653 0.00000000
ad_tda_pc_5.60.5_n5_lr_cf0_ov_acc<-ad_tda_pc_5.60.5_n5_lr_cf0$overall[1]
ad_tda_pc_5.60.5_n5_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.98300971 0.06887755 0.76898734
## Neg Pred Value Precision Recall
## 0.56250000 0.76898734 0.98300971
## F1 Prevalence Detection Rate
## 0.86292614 0.75921376 0.74631450
## Detection Prevalence Balanced Accuracy
## 0.97051597 0.52594363
ad_tda_pc_5.60.5_n5_lr_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n5_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.60.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.60.5_n5_lr_fit_re)
diff_tda_pca_5.60.5_lr_n5_3_fold
## Accuracy
## 1 -0.1388523
## 2 -0.1108911
## 3 -0.1346470
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n5_3_fold_odds.left<-bst_tda_pca_5.60.5_lr.n5_3_fold$probLeft/bst_tda_pca_5.60.5_lr.n5_3_fold$probRight
bst_tda_pca_5.60.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n5_3_fold
## $winLeft
## [1] 0.9913667
##
## $winRope
## [1] 0.008633333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n5_3_fold
## $left
## [1] 0.996419
##
## $rope
## [1] 0.0009543443
##
## $right
## [1] 0.002626614
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr_n5_3_fold))
#bf_tda_pca_5.60.5_lr.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_lr_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_lr_n5_3_fold)
## t = -14.72, df = 2, p-value = 0.004584
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.16558296 -0.09067728
## sample estimates:
## mean of x
## -0.1281301
### Test set diff
diff_tda_pca_5.60.5_lr.n5_test<-(lr_cf_ov_acc - ad_tda_pc_5.60.5_n5_lr_cf0_ov_acc)
diff_tda_pca_5.60.5_lr.n5_test
## Accuracy
## 0.09039722
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_lr.n5_test),-0.01,0.01)
bst_tda_pca_5.60.5_lr.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_lr.n5_test_odds.left<-bst_tda_pca_5.60.5_lr.n5_test$probLeft/bst_tda_pca_5.60.5_lr.n5_test$probRight
bst_tda_pca_5.60.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_lr.n5_test),-0.01,0.01)
bsr_tda_pca_5.60.5_lr.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1588
##
## $winRight
## [1] 0.8412
# Bayesian Correlated Test
bct_tda_pca_5.60.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_lr.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_lr.n5_test)))
#BayesFactor
#bf_tda_pca_5.60.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_lr.n5_test)) #bf_tda_pca_5.60.5_lr.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_lr.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.60.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.60.5.n1.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.60.5_n1_LrFit0
## Generalized Linear Model
##
## 15260 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 10173, 10174, 10173
## Resampling results:
##
## Accuracy Kappa
## 0.8581258 0.615439
Adult_TDA_KDE_5.60.5_n1_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8594456 0.6184364 Fold1
## 2 0.8576484 0.6172245 Fold2
## 3 0.8572833 0.6106561 Fold3
ad_tda_kde_5.60.5_n1_lr_fit_re<-Adult_TDA_KDE_5.60.5_n1_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n1_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.142e+12 1.239e+13 -0.496 0.620110
## V1 1.518e-02 2.306e-03 6.585 4.56e-11 ***
## V2.. 6.142e+12 1.239e+13 0.496 0.620110
## V2.Federal.gov 6.142e+12 1.239e+13 0.496 0.620110
## V2.Local.gov 6.142e+12 1.239e+13 0.496 0.620110
## V2.Never.worked 6.142e+12 1.239e+13 0.496 0.620110
## V2.Private 6.142e+12 1.239e+13 0.496 0.620110
## V2.Self.emp.inc 6.142e+12 1.239e+13 0.496 0.620110
## V2.Self.emp.not.inc 6.142e+12 1.239e+13 0.496 0.620110
## V2.State.gov 6.142e+12 1.239e+13 0.496 0.620110
## V2.Without.pay 6.142e+12 1.239e+13 0.496 0.620110
## V3 7.344e-07 2.017e-07 3.641 0.000271 ***
## V4.10th -1.128e+00 1.674e-01 -6.742 1.56e-11 ***
## V4.11th -1.072e+00 1.671e-01 -6.413 1.42e-10 ***
## V4.12th -7.837e-01 3.087e-01 -2.538 0.011135 *
## V4.1st.4th -1.700e+00 4.850e-01 -3.504 0.000457 ***
## V4.5th.6th -1.455e+00 3.062e-01 -4.752 2.01e-06 ***
## V4.7th.8th -1.572e+00 1.929e-01 -8.149 3.67e-16 ***
## V4.9th -1.287e+00 2.270e-01 -5.669 1.44e-08 ***
## V4.Assoc.acdm 1.192e-01 1.470e-01 0.811 0.417351
## V4.Assoc.voc 1.628e-01 1.456e-01 1.118 0.263642
## V4.Bachelors 6.651e-01 9.143e-02 7.275 3.48e-13 ***
## V4.Doctorate 1.899e+00 1.719e-01 11.049 < 2e-16 ***
## V4.HS.grad -4.291e-01 9.057e-02 -4.738 2.16e-06 ***
## V4.Masters 1.127e+00 1.104e-01 10.205 < 2e-16 ***
## V4.Preschool -3.330e+01 9.685e+04 0.000 0.999726
## V4.Prof.school 1.666e+00 1.529e-01 10.901 < 2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -1.933e-01 1.782e-01 -1.085 0.278056
## V6.Married.AF.spouse 2.466e+00 8.528e-01 2.892 0.003829 **
## V6.Married.civ.spouse 2.036e+00 4.216e-01 4.829 1.37e-06 ***
## V6.Married.spouse.absent -2.048e-01 3.325e-01 -0.616 0.537886
## V6.Never.married -8.052e-01 1.894e-01 -4.252 2.12e-05 ***
## V6.Separated -3.555e-01 2.656e-01 -1.339 0.180721
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical -1.669e-01 1.540e-01 -1.084 0.278455
## V7.Armed.Forces -2.270e-01 2.296e+00 -0.099 0.921251
## V7.Craft.repair 1.588e-02 1.316e-01 0.121 0.903974
## V7.Exec.managerial 7.165e-01 1.324e-01 5.413 6.19e-08 ***
## V7.Farming.fishing -1.108e+00 2.005e-01 -5.526 3.27e-08 ***
## V7.Handlers.cleaners -7.512e-01 2.356e-01 -3.188 0.001431 **
## V7.Machine.op.inspct -5.187e-01 1.766e-01 -2.937 0.003312 **
## V7.Other.service -1.083e+00 1.962e-01 -5.520 3.39e-08 ***
## V7.Priv.house.serv -3.848e+00 1.747e+00 -2.203 0.027629 *
## V7.Prof.specialty 3.384e-01 1.390e-01 2.434 0.014923 *
## V7.Protective.serv 2.530e-01 2.085e-01 1.214 0.224793
## V7.Sales 1.023e-01 1.385e-01 0.739 0.459977
## V7.Tech.support 4.957e-01 1.875e-01 2.643 0.008205 **
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.266e+00 1.450e-01 -8.731 < 2e-16 ***
## V8.Not.in.family -5.156e-01 4.071e-01 -1.266 0.205375
## V8.Other.relative -1.278e+00 3.662e-01 -3.491 0.000482 ***
## V8.Own.child -1.723e+00 4.221e-01 -4.083 4.45e-05 ***
## V8.Unmarried -5.268e-01 4.189e-01 -1.258 0.208517
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.668e-01 2.913e-01 -0.573 0.566815
## V9.Asian.Pac.Islander -1.754e-02 2.423e-01 -0.072 0.942299
## V9.Black -1.080e-01 1.103e-01 -0.979 0.327734
## V9.Other 2.535e-01 3.757e-01 0.675 0.499837
## V9.White NA NA NA NA
## V10.Female -8.974e-01 1.066e-01 -8.419 < 2e-16 ***
## V10.Male NA NA NA NA
## V11 3.052e-04 1.414e-05 21.586 < 2e-16 ***
## V12 6.961e-04 5.292e-05 13.155 < 2e-16 ***
## V13 3.004e-02 2.218e-03 13.540 < 2e-16 ***
## V14.. -1.449e+00 1.137e+00 -1.275 0.202411
## V14.Cambodia -3.782e-02 1.433e+00 -0.026 0.978950
## V14.Canada -1.058e+00 1.172e+00 -0.903 0.366680
## V14.China -1.976e+00 1.227e+00 -1.611 0.107143
## V14.Columbia -2.998e+00 1.413e+00 -2.121 0.033931 *
## V14.Cuba -1.552e+00 1.196e+00 -1.298 0.194354
## V14.Dominican.Republic -2.470e+01 3.907e+04 -0.001 0.999496
## V14.Ecuador -2.819e+00 1.910e+00 -1.476 0.140023
## V14.El.Salvador -1.865e+00 1.325e+00 -1.408 0.159130
## V14.England -9.328e-01 1.186e+00 -0.786 0.431585
## V14.France -9.459e-01 1.274e+00 -0.742 0.457911
## V14.Germany -7.957e-01 1.193e+00 -0.667 0.504693
## V14.Greece -1.988e+00 1.363e+00 -1.458 0.144829
## V14.Guatemala -1.478e+00 1.500e+00 -0.985 0.324396
## V14.Haiti -2.243e+00 1.874e+00 -1.197 0.231472
## V14.Holand.Netherlands -2.367e+01 3.223e+05 0.000 0.999941
## V14.Honduras -2.111e+00 2.839e+00 -0.744 0.457073
## V14.Hong -6.762e-01 1.385e+00 -0.488 0.625273
## V14.Hungary -3.321e-01 1.473e+00 -0.225 0.821696
## V14.India -1.865e+00 1.197e+00 -1.559 0.119004
## V14.Iran -2.005e+00 1.390e+00 -1.442 0.149222
## V14.Ireland -1.381e+00 1.688e+00 -0.818 0.413343
## V14.Italy 3.534e-01 1.184e+00 0.299 0.765237
## V14.Jamaica -2.563e+00 1.532e+00 -1.673 0.094333 .
## V14.Japan -4.642e-01 1.286e+00 -0.361 0.718023
## V14.Laos -7.318e-01 1.516e+00 -0.483 0.629310
## V14.Mexico -1.762e+00 1.147e+00 -1.537 0.124377
## V14.Nicaragua -1.886e+00 1.394e+00 -1.353 0.176200
## V14.Outlying.US.Guam.USVI.etc. -2.592e+01 1.244e+05 0.000 0.999834
## V14.Peru -1.528e+00 1.583e+00 -0.965 0.334443
## V14.Philippines -1.053e+00 1.180e+00 -0.892 0.372499
## V14.Poland -1.188e+00 1.286e+00 -0.924 0.355727
## V14.Portugal -2.408e+00 1.686e+00 -1.429 0.153119
## V14.Puerto.Rico -6.206e-01 1.236e+00 -0.502 0.615675
## V14.Scotland -4.862e-01 1.548e+00 -0.314 0.753535
## V14.South -3.051e+00 1.306e+00 -2.336 0.019482 *
## V14.Taiwan -7.284e-01 1.290e+00 -0.565 0.572285
## V14.Thailand -1.994e+00 1.784e+00 -1.118 0.263509
## V14.Trinadad.Tobago -1.246e+00 1.675e+00 -0.744 0.456827
## V14.United.States -1.074e+00 1.120e+00 -0.959 0.337576
## V14.Vietnam -1.899e+00 1.415e+00 -1.343 0.179355
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 17488.4 on 15259 degrees of freedom
## Residual deviance: 9509.3 on 15160 degrees of freedom
## AIC: 9709.3
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.60.5_n1_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n1_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.60.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6970 1001
## >50K 446 1351
##
## Accuracy : 0.8519
## 95% CI : (0.8447, 0.8589)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5593
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9399
## Specificity : 0.5744
## Pos Pred Value : 0.8744
## Neg Pred Value : 0.7518
## Prevalence : 0.7592
## Detection Rate : 0.7136
## Detection Prevalence : 0.8160
## Balanced Accuracy : 0.7571
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6970 1001
## >50K 446 1351
##
## Accuracy : 0.8519
## 95% CI : (0.8447, 0.8589)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5593
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9399
## Specificity : 0.5744
## Pos Pred Value : 0.8744
## Neg Pred Value : 0.7518
## Prevalence : 0.7592
## Detection Rate : 0.7136
## Detection Prevalence : 0.8160
## Balanced Accuracy : 0.7571
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n1_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.518632e-01 5.593270e-01 8.446625e-01 8.588534e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 3.746356e-113 4.771065e-48
ad_tda_kde_5.60.5_n1_lr_cf0_ov_acc<-ad_tda_kde_5.60.5_n1_lr_cf0$overall[1]
ad_tda_kde_5.60.5_n1_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9398598 0.5744048 0.8744198
## Neg Pred Value Precision Recall
## 0.7518086 0.8744198 0.9398598
## F1 Prevalence Detection Rate
## 0.9059596 0.7592138 0.7135545
## Detection Prevalence Balanced Accuracy
## 0.8160319 0.7571323
ad_tda_kde_5.60.5_n1_lr_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n1_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.60.5_n1_lr_fit_re)
diff_tda_kde_5.60.5_lr_n1_3_fold
## Accuracy
## 1 -0.009748357
## 2 -0.009418882
## 3 -0.001926598
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n1_3_fold_odds.left<-bst_tda_kde_5.60.5_lr.n1_3_fold$probLeft/bst_tda_kde_5.60.5_lr.n1_3_fold$probRight
bst_tda_kde_5.60.5_lr.n1_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n1_3_fold
## $left
## [1] 0.2100575
##
## $rope
## [1] 0.7755917
##
## $right
## [1] 0.01435083
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr_n1_3_fold))
#bf_tda_kde_5.60.5_lr.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_lr_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_lr_n1_3_fold)
## t = -2.7529, df = 2, p-value = 0.1105
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.018020736 0.003958178
## sample estimates:
## mean of x
## -0.007031279
### Test set diff
diff_tda_kde_5.60.5_lr.n1_test<-(lr_cf_ov_acc - ad_tda_kde_5.60.5_n1_lr_cf0_ov_acc)
diff_tda_kde_5.60.5_lr.n1_test
## Accuracy
## 0.001433251
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr.n1_test),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n1_test_odds.left<-bst_tda_kde_5.60.5_lr.n1_test$probLeft/bst_tda_kde_5.60.5_lr.n1_test$probRight
bst_tda_kde_5.60.5_lr.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr.n1_test),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_lr.n1_test)))
#BayesFactor
#bf_tda_kde_5.60.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr.n1_test)) #bf_tda_pca_5.60.5_lr.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_lr.n1_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_KDE_5.60.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.60.5.n2.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.60.5_n2_LrFit0
## Generalized Linear Model
##
## 14482 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9654, 9655, 9655
## Resampling results:
##
## Accuracy Kappa
## 0.837868 0.5866575
Adult_TDA_KDE_5.60.5_n2_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8340928 0.5799871 Fold1
## 2 0.8402735 0.5877605 Fold2
## 3 0.8392376 0.5922249 Fold3
ad_tda_kde_5.60.5_n2_lr_fit_re<-Adult_TDA_KDE_5.60.5_n2_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n2_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (15 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.309e+13 9.862e+12 -1.327 0.184466
## V1 4.049e-02 2.794e-03 14.493 < 2e-16 ***
## V2.. 1.309e+13 9.862e+12 1.327 0.184466
## V2.Federal.gov 1.309e+13 9.862e+12 1.327 0.184466
## V2.Local.gov 1.309e+13 9.862e+12 1.327 0.184466
## V2.Never.worked -4.491e+15 9.862e+12 -455.349 < 2e-16 ***
## V2.Private 1.309e+13 9.862e+12 1.327 0.184466
## V2.Self.emp.inc 1.309e+13 9.862e+12 1.327 0.184466
## V2.Self.emp.not.inc 1.309e+13 9.862e+12 1.327 0.184466
## V2.State.gov 1.309e+13 9.862e+12 1.327 0.184466
## V2.Without.pay 1.309e+13 9.862e+12 1.327 0.184466
## V3 1.108e-06 2.829e-07 3.917 8.96e-05 ***
## V4.10th -1.102e+00 4.439e-01 -2.482 0.013066 *
## V4.11th -8.364e-01 1.988e-01 -4.208 2.58e-05 ***
## V4.12th -7.124e-01 2.927e-01 -2.434 0.014943 *
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm 2.085e-01 1.279e-01 1.630 0.103122
## V4.Assoc.voc 1.955e-01 1.314e-01 1.489 0.136600
## V4.Bachelors 9.590e-01 8.476e-02 11.314 < 2e-16 ***
## V4.Doctorate NA NA NA NA
## V4.HS.grad -4.015e-01 8.285e-02 -4.845 1.26e-06 ***
## V4.Masters 1.363e+00 1.086e-01 12.547 < 2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school 2.391e+00 2.008e-01 11.908 < 2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -1.674e-02 2.200e-01 -0.076 0.939333
## V6.Married.AF.spouse 2.853e+00 8.913e-01 3.201 0.001369 **
## V6.Married.civ.spouse 1.809e+00 4.933e-01 3.668 0.000245 ***
## V6.Married.spouse.absent 1.727e-02 3.627e-01 0.048 0.962025
## V6.Never.married -5.343e-01 2.302e-01 -2.321 0.020284 *
## V6.Separated -2.855e-01 3.026e-01 -0.943 0.345537
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical -1.043e-02 1.524e-01 -0.068 0.945425
## V7.Armed.Forces -2.422e+01 2.622e+05 0.000 0.999926
## V7.Craft.repair 1.091e-01 1.350e-01 0.808 0.419225
## V7.Exec.managerial 8.528e-01 1.345e-01 6.338 2.32e-10 ***
## V7.Farming.fishing -7.283e-01 2.070e-01 -3.518 0.000435 ***
## V7.Handlers.cleaners -5.735e-01 2.418e-01 -2.371 0.017723 *
## V7.Machine.op.inspct -3.798e-01 1.762e-01 -2.155 0.031131 *
## V7.Other.service -9.053e-01 1.939e-01 -4.670 3.01e-06 ***
## V7.Priv.house.serv -3.426e+00 2.418e+00 -1.417 0.156563
## V7.Prof.specialty 4.991e-01 1.408e-01 3.544 0.000394 ***
## V7.Protective.serv 6.908e-01 2.001e-01 3.452 0.000556 ***
## V7.Sales 3.437e-01 1.392e-01 2.469 0.013534 *
## V7.Tech.support 5.862e-01 1.782e-01 3.290 0.001001 **
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.444e+00 1.402e-01 -10.303 < 2e-16 ***
## V8.Not.in.family -1.176e+00 4.607e-01 -2.553 0.010681 *
## V8.Other.relative -1.698e+00 3.971e-01 -4.277 1.89e-05 ***
## V8.Own.child -2.339e+00 4.729e-01 -4.946 7.57e-07 ***
## V8.Unmarried -1.266e+00 4.708e-01 -2.690 0.007150 **
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -8.523e-02 3.050e-01 -0.279 0.779895
## V9.Asian.Pac.Islander -3.720e-02 2.165e-01 -0.172 0.863567
## V9.Black -1.473e-02 1.112e-01 -0.132 0.894606
## V9.Other 4.444e-02 3.804e-01 0.117 0.907010
## V9.White NA NA NA NA
## V10.Female -8.839e-01 1.061e-01 -8.327 < 2e-16 ***
## V10.Male NA NA NA NA
## V11 3.255e-04 1.496e-05 21.755 < 2e-16 ***
## V12 7.599e-04 5.598e-05 13.575 < 2e-16 ***
## V13 2.552e-02 2.465e-03 10.353 < 2e-16 ***
## V14.. -4.037e-01 8.580e-01 -0.470 0.638033
## V14.Cambodia 1.943e+00 1.262e+00 1.540 0.123630
## V14.Canada -1.803e-02 9.283e-01 -0.019 0.984507
## V14.China -4.556e-01 1.005e+00 -0.453 0.650328
## V14.Columbia -2.457e+01 5.482e+04 0.000 0.999642
## V14.Cuba 3.746e-01 9.599e-01 0.390 0.696328
## V14.Dominican.Republic -2.402e+01 6.168e+04 0.000 0.999689
## V14.Ecuador -1.364e+00 1.496e+00 -0.912 0.361966
## V14.El.Salvador -1.258e-01 1.058e+00 -0.119 0.905377
## V14.England 6.551e-01 9.257e-01 0.708 0.479102
## V14.France 8.094e-01 1.158e+00 0.699 0.484717
## V14.Germany 7.270e-01 9.023e-01 0.806 0.420428
## V14.Greece -1.297e+00 1.148e+00 -1.130 0.258643
## V14.Guatemala -6.867e-01 2.179e+00 -0.315 0.752665
## V14.Haiti -3.621e-01 1.177e+00 -0.308 0.758395
## V14.Holand.Netherlands -2.246e+01 3.276e+05 0.000 0.999945
## V14.Honduras -2.252e+01 1.135e+05 0.000 0.999842
## V14.Hong 1.802e+00 1.315e+00 1.370 0.170542
## V14.Hungary 6.030e-01 1.429e+00 0.422 0.673095
## V14.India -9.643e-02 9.128e-01 -0.106 0.915862
## V14.Iran 1.989e-01 1.127e+00 0.176 0.859909
## V14.Ireland -2.388e+01 1.224e+05 0.000 0.999844
## V14.Italy 1.144e+00 9.684e-01 1.181 0.237472
## V14.Jamaica -1.196e+00 1.151e+00 -1.039 0.298678
## V14.Japan 6.682e-01 1.011e+00 0.661 0.508751
## V14.Laos -2.443e+01 1.052e+05 0.000 0.999815
## V14.Mexico -5.701e-01 9.323e-01 -0.611 0.540877
## V14.Nicaragua -4.672e-01 1.388e+00 -0.337 0.736460
## V14.Outlying.US.Guam.USVI.etc. -2.408e+01 1.845e+05 0.000 0.999896
## V14.Peru -4.700e-02 1.286e+00 -0.037 0.970837
## V14.Philippines 6.400e-01 9.024e-01 0.709 0.478163
## V14.Poland 6.504e-01 1.012e+00 0.642 0.520630
## V14.Portugal 8.490e-01 1.435e+00 0.592 0.554169
## V14.Puerto.Rico -7.499e-01 1.061e+00 -0.707 0.479752
## V14.Scotland 1.339e-01 1.370e+00 0.098 0.922146
## V14.South -8.075e-01 1.004e+00 -0.804 0.421444
## V14.Taiwan 8.773e-02 1.025e+00 0.086 0.931796
## V14.Thailand -8.249e-01 1.465e+00 -0.563 0.573362
## V14.Trinadad.Tobago -1.844e-01 1.740e+00 -0.106 0.915585
## V14.United.States 2.699e-01 8.346e-01 0.323 0.746386
## V14.Vietnam -1.080e+00 1.220e+00 -0.885 0.376068
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 17221.4 on 14481 degrees of freedom
## Residual deviance: 9708.1 on 14388 degrees of freedom
## AIC: 9896.1
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.60.5_n2_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n2_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.60.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6902 992
## >50K 514 1360
##
## Accuracy : 0.8458
## 95% CI : (0.8385, 0.8529)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5469
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9307
## Specificity : 0.5782
## Pos Pred Value : 0.8743
## Neg Pred Value : 0.7257
## Prevalence : 0.7592
## Detection Rate : 0.7066
## Detection Prevalence : 0.8081
## Balanced Accuracy : 0.7545
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6902 992
## >50K 514 1360
##
## Accuracy : 0.8458
## 95% CI : (0.8385, 0.8529)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5469
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9307
## Specificity : 0.5782
## Pos Pred Value : 0.8743
## Neg Pred Value : 0.7257
## Prevalence : 0.7592
## Detection Rate : 0.7066
## Detection Prevalence : 0.8081
## Balanced Accuracy : 0.7545
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n2_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.458231e-01 5.468678e-01 8.385076e-01 8.529317e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 2.418652e-98 1.005859e-34
ad_tda_kde_5.60.5_n2_lr_cf0_ov_acc<-ad_tda_kde_5.60.5_n2_lr_cf0$overall[1]
ad_tda_kde_5.60.5_n2_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9306904 0.5782313 0.8743349
## Neg Pred Value Precision Recall
## 0.7257204 0.8743349 0.9306904
## F1 Prevalence Detection Rate
## 0.9016329 0.7592138 0.7065930
## Detection Prevalence Balanced Accuracy
## 0.8081491 0.7544608
ad_tda_kde_5.60.5_n2_lr_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n2_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.60.5_n2_lr_fit_re)
diff_tda_kde_5.60.5_lr_n2_3_fold
## Accuracy
## 1 0.015604497
## 2 0.007956103
## 3 0.016119051
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n2_3_fold_odds.left<-bst_tda_kde_5.60.5_lr.n2_3_fold$probLeft/bst_tda_kde_5.60.5_lr.n2_3_fold$probRight
bst_tda_kde_5.60.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.3069333
##
## $winRight
## [1] 0.6930667
# Bayesian Correlated Test
bct_tda_kde_5.60.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n2_3_fold
## $left
## [1] 0.008392836
##
## $rope
## [1] 0.1919669
##
## $right
## [1] 0.7996403
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr_n2_3_fold))
#bf_tda_kde_5.60.5_lr.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_lr_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_lr_n2_3_fold)
## t = 5.0112, df = 2, p-value = 0.03759
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.001870099 0.024583001
## sample estimates:
## mean of x
## 0.01322655
### Test set diff
diff_tda_kde_5.60.5_lr.n2_test<-(lr_cf_ov_acc - ad_tda_kde_5.60.5_n2_lr_cf0_ov_acc)
diff_tda_kde_5.60.5_lr.n2_test
## Accuracy
## 0.007473382
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr.n2_test),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n2_test_odds.left<-bst_tda_kde_5.60.5_lr.n2_test$probLeft/bst_tda_kde_5.60.5_lr.n2_test$probRight
bst_tda_kde_5.60.5_lr.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_lr.n2_test)))
#BayesFactor
#bf_tda_kde_5.60.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr.n2_test)) #bf_tda_pca_5.60.5_lr.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_lr.n2_test))
##Node3
Adult_TDA_KDE_5.60.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.60.5.n3.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.60.5_n3_LrFit0
## Generalized Linear Model
##
## 13266 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8844, 8844, 8844
## Resampling results:
##
## Accuracy Kappa
## 0.8340118 0.5665226
Adult_TDA_KDE_5.60.5_n3_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8335595 0.5693901 Fold1
## 2 0.8367255 0.5687698 Fold2
## 3 0.8317503 0.5614079 Fold3
ad_tda_kde_5.60.5_n3_lr_fit_re<-Adult_TDA_KDE_5.60.5_n2_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n3_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (17 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.534e+12 5.012e+12 -0.705 0.480792
## V1 5.465e-02 3.383e-03 16.154 < 2e-16 ***
## V2.. 3.534e+12 5.012e+12 0.705 0.480792
## V2.Federal.gov 3.534e+12 5.012e+12 0.705 0.480792
## V2.Local.gov 3.534e+12 5.012e+12 0.705 0.480792
## V2.Never.worked -4.500e+15 5.012e+12 -897.844 < 2e-16 ***
## V2.Private 3.534e+12 5.012e+12 0.705 0.480792
## V2.Self.emp.inc 3.534e+12 5.012e+12 0.705 0.480792
## V2.Self.emp.not.inc 3.534e+12 5.012e+12 0.705 0.480792
## V2.State.gov 3.534e+12 5.012e+12 0.705 0.480792
## V2.Without.pay 3.534e+12 5.012e+12 0.705 0.480792
## V3 1.200e-06 3.901e-07 3.077 0.002088 **
## V4.10th NA NA NA NA
## V4.11th -8.826e-01 7.509e-01 -1.175 0.239880
## V4.12th -4.441e-01 3.219e-01 -1.379 0.167752
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm 4.222e-01 1.314e-01 3.213 0.001313 **
## V4.Assoc.voc 3.291e-01 1.237e-01 2.661 0.007789 **
## V4.Bachelors 1.100e+00 8.627e-02 12.749 < 2e-16 ***
## V4.Doctorate NA NA NA NA
## V4.HS.grad -2.741e-01 8.024e-02 -3.417 0.000634 ***
## V4.Masters 1.455e+00 1.263e-01 11.527 < 2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school NA NA NA NA
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -6.112e-02 2.879e-01 -0.212 0.831902
## V6.Married.AF.spouse 3.036e+00 9.005e-01 3.371 0.000748 ***
## V6.Married.civ.spouse 2.160e+00 5.014e-01 4.309 1.64e-05 ***
## V6.Married.spouse.absent 6.195e-02 4.133e-01 0.150 0.880844
## V6.Never.married -4.394e-01 2.979e-01 -1.475 0.140283
## V6.Separated -2.142e-01 3.630e-01 -0.590 0.555187
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.647e-01 1.526e-01 1.735 0.082706 .
## V7.Armed.Forces -2.407e+01 1.761e+05 0.000 0.999891
## V7.Craft.repair 2.148e-01 1.353e-01 1.588 0.112367
## V7.Exec.managerial 1.024e+00 1.372e-01 7.467 8.21e-14 ***
## V7.Farming.fishing -8.214e-01 2.291e-01 -3.585 0.000337 ***
## V7.Handlers.cleaners -5.135e-01 2.399e-01 -2.140 0.032333 *
## V7.Machine.op.inspct -4.451e-02 1.664e-01 -0.267 0.789102
## V7.Other.service -6.582e-01 1.975e-01 -3.333 0.000858 ***
## V7.Priv.house.serv -3.444e+00 2.177e+00 -1.582 0.113700
## V7.Prof.specialty 6.800e-01 1.452e-01 4.684 2.81e-06 ***
## V7.Protective.serv 9.109e-01 1.994e-01 4.568 4.93e-06 ***
## V7.Sales 5.680e-01 1.410e-01 4.029 5.61e-05 ***
## V7.Tech.support 7.595e-01 1.784e-01 4.258 2.06e-05 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.475e+00 1.509e-01 -9.770 < 2e-16 ***
## V8.Not.in.family -9.954e-01 4.341e-01 -2.293 0.021837 *
## V8.Other.relative -2.005e+00 3.790e-01 -5.289 1.23e-07 ***
## V8.Own.child -2.293e+00 4.219e-01 -5.434 5.50e-08 ***
## V8.Unmarried -1.206e+00 4.459e-01 -2.705 0.006829 **
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -5.655e-01 3.482e-01 -1.624 0.104354
## V9.Asian.Pac.Islander 4.351e-02 2.155e-01 0.202 0.840000
## V9.Black -2.071e-01 1.167e-01 -1.775 0.075929 .
## V9.Other -4.616e-01 4.228e-01 -1.092 0.274958
## V9.White NA NA NA NA
## V10.Female -8.109e-01 1.188e-01 -6.827 8.69e-12 ***
## V10.Male NA NA NA NA
## V11 3.183e-04 1.567e-05 20.313 < 2e-16 ***
## V12 6.529e-04 5.952e-05 10.968 < 2e-16 ***
## V13 2.541e-02 2.712e-03 9.368 < 2e-16 ***
## V14.. -5.368e-01 8.526e-01 -0.630 0.528925
## V14.Cambodia 1.519e+00 1.192e+00 1.275 0.202433
## V14.Canada 2.187e-01 9.252e-01 0.236 0.813088
## V14.China -9.917e-01 1.050e+00 -0.944 0.345091
## V14.Columbia -2.494e+01 5.303e+04 0.000 0.999625
## V14.Cuba 1.293e+00 9.650e-01 1.340 0.180400
## V14.Dominican.Republic -1.024e+00 1.383e+00 -0.740 0.459151
## V14.Ecuador -1.294e+00 1.510e+00 -0.857 0.391678
## V14.El.Salvador -5.022e-01 1.118e+00 -0.449 0.653278
## V14.England 1.950e-01 9.425e-01 0.207 0.836124
## V14.France 1.378e+00 1.148e+00 1.200 0.230131
## V14.Germany 1.993e-01 9.055e-01 0.220 0.825787
## V14.Greece -1.982e+00 1.190e+00 -1.666 0.095775 .
## V14.Guatemala -2.422e+01 9.614e+04 0.000 0.999799
## V14.Haiti -2.290e-01 1.130e+00 -0.203 0.839407
## V14.Holand.Netherlands -2.221e+01 3.422e+05 0.000 0.999948
## V14.Honduras -2.323e+01 1.503e+05 0.000 0.999877
## V14.Hong 1.286e+00 2.679e+00 0.480 0.631290
## V14.Hungary -9.409e-01 1.445e+00 -0.651 0.514860
## V14.India -5.565e-01 9.309e-01 -0.598 0.549953
## V14.Iran 1.029e-01 9.853e-01 0.104 0.916841
## V14.Ireland 9.944e-01 1.227e+00 0.811 0.417632
## V14.Italy -2.572e-01 9.760e-01 -0.263 0.792174
## V14.Jamaica -2.657e-01 1.089e+00 -0.244 0.807172
## V14.Japan 2.288e-01 1.031e+00 0.222 0.824273
## V14.Laos -2.443e+01 1.495e+05 0.000 0.999870
## V14.Mexico -1.067e+00 9.894e-01 -1.078 0.280923
## V14.Nicaragua -2.388e+01 1.133e+05 0.000 0.999832
## V14.Outlying.US.Guam.USVI.etc. -2.340e+01 1.615e+05 0.000 0.999884
## V14.Peru -1.338e+00 1.536e+00 -0.871 0.383728
## V14.Philippines 8.530e-01 9.043e-01 0.943 0.345533
## V14.Poland -2.748e-01 1.008e+00 -0.273 0.785081
## V14.Portugal 3.493e-01 1.252e+00 0.279 0.780250
## V14.Puerto.Rico -1.476e+00 1.052e+00 -1.403 0.160758
## V14.Scotland -7.664e-01 1.523e+00 -0.503 0.614822
## V14.South -1.207e+00 1.033e+00 -1.169 0.242444
## V14.Taiwan -5.648e-01 1.099e+00 -0.514 0.607329
## V14.Thailand -1.059e+00 1.561e+00 -0.678 0.497672
## V14.Trinadad.Tobago -4.901e-01 1.660e+00 -0.295 0.767748
## V14.United.States 1.300e-02 8.294e-01 0.016 0.987493
## V14.Vietnam -2.663e+00 1.393e+00 -1.911 0.055981 .
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 15481.3 on 13265 degrees of freedom
## Residual deviance: 8855.9 on 13174 degrees of freedom
## AIC: 9039.9
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.60.5_n3_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n3_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.60.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6763 927
## >50K 653 1425
##
## Accuracy : 0.8382
## 95% CI : (0.8308, 0.8455)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5393
##
## Mcnemar's Test P-Value : 6.508e-12
##
## Sensitivity : 0.9119
## Specificity : 0.6059
## Pos Pred Value : 0.8795
## Neg Pred Value : 0.6858
## Prevalence : 0.7592
## Detection Rate : 0.6924
## Detection Prevalence : 0.7873
## Balanced Accuracy : 0.7589
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6763 927
## >50K 653 1425
##
## Accuracy : 0.8382
## 95% CI : (0.8308, 0.8455)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5393
##
## Mcnemar's Test P-Value : 6.508e-12
##
## Sensitivity : 0.9119
## Specificity : 0.6059
## Pos Pred Value : 0.8795
## Neg Pred Value : 0.6858
## Prevalence : 0.7592
## Detection Rate : 0.6924
## Detection Prevalence : 0.7873
## Balanced Accuracy : 0.7589
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n3_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.382473e-01 5.392635e-01 8.307933e-01 8.454991e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.943266e-81 6.508068e-12
ad_tda_kde_5.60.5_n3_lr_cf0_ov_acc<-ad_tda_kde_5.60.5_n3_lr_cf0$overall[1]
ad_tda_kde_5.60.5_n3_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9119471 0.6058673 0.8794538
## Neg Pred Value Precision Recall
## 0.6857555 0.8794538 0.9119471
## F1 Prevalence Detection Rate
## 0.8954058 0.7592138 0.6923628
## Detection Prevalence Balanced Accuracy
## 0.7872645 0.7589072
ad_tda_kde_5.60.5_n3_lr_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n3_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.60.5_n3_lr_fit_re)
diff_tda_kde_5.60.5_lr_n3_3_fold
## Accuracy
## 1 0.015604497
## 2 0.007956103
## 3 0.016119051
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n3_3_fold_odds.left<-bst_tda_kde_5.60.5_lr.n3_3_fold$probLeft/bst_tda_kde_5.60.5_lr.n3_3_fold$probRight
bst_tda_kde_5.60.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.3048667
##
## $winRight
## [1] 0.6951333
# Bayesian Correlated Test
bct_tda_kde_5.60.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n3_3_fold
## $left
## [1] 0.008392836
##
## $rope
## [1] 0.1919669
##
## $right
## [1] 0.7996403
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr_n3_3_fold))
#bf_tda_kde_5.60.5_lr.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_lr_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_lr_n3_3_fold)
## t = 5.0112, df = 2, p-value = 0.03759
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.001870099 0.024583001
## sample estimates:
## mean of x
## 0.01322655
### Test set diff
diff_tda_kde_5.60.5_lr.n3_test<-(lr_cf_ov_acc - ad_tda_kde_5.60.5_n3_lr_cf0_ov_acc)
diff_tda_kde_5.60.5_lr.n3_test
## Accuracy
## 0.01504914
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr.n3_test),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n3_test_odds.left<-bst_tda_kde_5.60.5_lr.n3_test$probLeft/bst_tda_kde_5.60.5_lr.n3_test$probRight
bst_tda_kde_5.60.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_lr.n3_test)))
#BayesFactor
#bf_tda_kde_5.60.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr.n3_test)) #bf_tda_pca_5.60.5_lr.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_lr.n3_test))
##Node4
Adult_TDA_KDE_5.60.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.60.5.n4.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.60.5_n4_LrFit0
## Generalized Linear Model
##
## 11795 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7864, 7863, 7863
## Resampling results:
##
## Accuracy Kappa
## 0.8459493 0.5208813
Adult_TDA_KDE_5.60.5_n4_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8176037 0.4479074 Fold1
## 2 0.8596134 0.5612620 Fold2
## 3 0.8606307 0.5534746 Fold3
ad_tda_kde_5.60.5_n4_lr_fit_re<-Adult_TDA_KDE_5.60.5_n4_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n4_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (20 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.825e+15 5.926e+07 -132041302 <2e-16 ***
## V1 2.987e+13 8.580e+04 348131951 <2e-16 ***
## V2.. 1.530e+15 4.768e+07 32094105 <2e-16 ***
## V2.Federal.gov 4.069e+15 4.767e+07 85357515 <2e-16 ***
## V2.Local.gov 7.326e+14 4.760e+07 15390105 <2e-16 ***
## V2.Never.worked 4.947e+15 6.720e+07 73620966 <2e-16 ***
## V2.Private 8.540e+14 4.751e+07 17974386 <2e-16 ***
## V2.Self.emp.inc 1.145e+15 4.767e+07 24026636 <2e-16 ***
## V2.Self.emp.not.inc 4.142e+14 4.758e+07 8704503 <2e-16 ***
## V2.State.gov 2.119e+15 4.764e+07 44487685 <2e-16 ***
## V2.Without.pay NA NA NA NA
## V3 1.027e+09 1.119e+01 91806324 <2e-16 ***
## V4.10th NA NA NA NA
## V4.11th NA NA NA NA
## V4.12th -5.304e+13 6.151e+06 -8623316 <2e-16 ***
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm 1.087e+14 3.789e+06 28691297 <2e-16 ***
## V4.Assoc.voc 6.702e+13 2.708e+06 24750048 <2e-16 ***
## V4.Bachelors 6.943e+14 2.237e+06 310318700 <2e-16 ***
## V4.Doctorate NA NA NA NA
## V4.HS.grad -1.991e+14 1.530e+06 -130091327 <2e-16 ***
## V4.Masters 3.434e+15 2.142e+07 160339901 <2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school NA NA NA NA
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -7.624e+13 6.365e+06 -11977925 <2e-16 ***
## V6.Married.AF.spouse 2.675e+15 2.231e+07 119919038 <2e-16 ***
## V6.Married.civ.spouse 1.645e+15 9.689e+06 169741764 <2e-16 ***
## V6.Married.spouse.absent 1.068e+14 8.483e+06 12596312 <2e-16 ***
## V6.Never.married 1.351e+14 6.436e+06 20987413 <2e-16 ***
## V6.Separated 2.605e+13 6.978e+06 3732951 <2e-16 ***
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.690e+14 3.390e+06 79342700 <2e-16 ***
## V7.Armed.Forces -3.599e+15 2.788e+07 -129072122 <2e-16 ***
## V7.Craft.repair 1.812e+14 3.247e+06 55813993 <2e-16 ***
## V7.Exec.managerial 7.603e+14 3.440e+06 220985831 <2e-16 ***
## V7.Farming.fishing -2.029e+14 5.077e+06 -39963607 <2e-16 ***
## V7.Handlers.cleaners -5.289e+13 4.118e+06 -12843922 <2e-16 ***
## V7.Machine.op.inspct 7.162e+13 3.660e+06 19571511 <2e-16 ***
## V7.Other.service 1.813e+14 3.473e+06 52198068 <2e-16 ***
## V7.Priv.house.serv 6.831e+14 1.421e+07 48065932 <2e-16 ***
## V7.Prof.specialty 5.913e+14 3.744e+06 157965286 <2e-16 ***
## V7.Protective.serv 6.847e+14 5.256e+06 130270589 <2e-16 ***
## V7.Sales 4.063e+14 3.371e+06 120523413 <2e-16 ***
## V7.Tech.support 5.056e+14 4.468e+06 113159969 <2e-16 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -4.236e+14 3.440e+06 -123126104 <2e-16 ***
## V8.Not.in.family -1.543e+14 8.026e+06 -19222478 <2e-16 ***
## V8.Other.relative -3.840e+13 8.027e+06 -4783925 <2e-16 ***
## V8.Own.child 1.478e+13 7.929e+06 1864194 <2e-16 ***
## V8.Unmarried -1.202e+14 8.146e+06 -14758509 <2e-16 ***
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -2.778e+14 6.746e+06 -41188330 <2e-16 ***
## V9.Asian.Pac.Islander 8.827e+14 5.052e+06 174723046 <2e-16 ***
## V9.Black -7.761e+13 2.163e+06 -35881645 <2e-16 ***
## V9.Other -2.786e+14 7.369e+06 -37801271 <2e-16 ***
## V9.White NA NA NA NA
## V10.Female -9.368e+13 1.860e+06 -50364131 <2e-16 ***
## V10.Male NA NA NA NA
## V11 4.594e+10 1.155e+02 397767698 <2e-16 ***
## V12 3.150e+11 1.622e+03 194264192 <2e-16 ***
## V13 1.047e+13 6.158e+04 169972735 <2e-16 ***
## V14.. 2.402e+15 3.390e+07 70845254 <2e-16 ***
## V14.Cambodia 3.161e+15 4.235e+07 74644070 <2e-16 ***
## V14.Canada 4.572e+15 3.562e+07 128352744 <2e-16 ***
## V14.China 4.577e+14 3.714e+07 12323050 <2e-16 ***
## V14.Columbia 9.928e+14 3.700e+07 26832292 <2e-16 ***
## V14.Cuba 4.683e+15 3.609e+07 129752299 <2e-16 ***
## V14.Dominican.Republic 1.277e+15 3.699e+07 34517901 <2e-16 ***
## V14.Ecuador 1.638e+15 3.942e+07 41543894 <2e-16 ***
## V14.El.Salvador 2.134e+15 3.640e+07 58630774 <2e-16 ***
## V14.England 2.967e+15 3.607e+07 82254355 <2e-16 ***
## V14.France 2.320e+15 4.117e+07 56355878 <2e-16 ***
## V14.Germany 3.456e+15 3.498e+07 98789297 <2e-16 ***
## V14.Greece 8.869e+14 3.973e+07 22320780 <2e-16 ***
## V14.Guatemala 3.833e+15 3.911e+07 98004056 <2e-16 ***
## V14.Haiti 1.077e+15 3.739e+07 28812312 <2e-16 ***
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras 5.376e+14 4.754e+07 11308063 <2e-16 ***
## V14.Hong 7.819e+14 5.843e+07 13383227 <2e-16 ***
## V14.Hungary -9.436e+14 5.130e+07 -18393229 <2e-16 ***
## V14.India 1.355e+15 3.689e+07 36743895 <2e-16 ***
## V14.Iran 4.149e+15 3.715e+07 111687386 <2e-16 ***
## V14.Ireland 4.518e+15 3.841e+07 117642930 <2e-16 ***
## V14.Italy 3.109e+15 3.664e+07 84859846 <2e-16 ***
## V14.Jamaica 3.019e+15 3.528e+07 85575023 <2e-16 ***
## V14.Japan 2.437e+15 3.677e+07 66267437 <2e-16 ***
## V14.Laos -1.103e+15 4.368e+07 -25247146 <2e-16 ***
## V14.Mexico 8.890e+14 3.432e+07 25899631 <2e-16 ***
## V14.Nicaragua 4.595e+14 3.975e+07 11559935 <2e-16 ***
## V14.Outlying.US.Guam.USVI.etc. 5.357e+15 4.752e+07 112726438 <2e-16 ***
## V14.Peru 1.346e+15 3.923e+07 34297473 <2e-16 ***
## V14.Philippines 1.926e+15 3.503e+07 54988866 <2e-16 ***
## V14.Poland 4.447e+14 3.610e+07 12318391 <2e-16 ***
## V14.Portugal 2.814e+15 4.036e+07 69724643 <2e-16 ***
## V14.Puerto.Rico 1.095e+15 3.516e+07 31153179 <2e-16 ***
## V14.Scotland -4.009e+14 4.752e+07 -8435028 <2e-16 ***
## V14.South 3.990e+14 3.592e+07 11108460 <2e-16 ***
## V14.Taiwan 4.044e+14 3.846e+07 10515566 <2e-16 ***
## V14.Thailand 2.004e+15 4.068e+07 49253826 <2e-16 ***
## V14.Trinadad.Tobago 1.742e+12 3.980e+07 43764 <2e-16 ***
## V14.United.States 1.049e+15 3.359e+07 31225629 <2e-16 ***
## V14.Vietnam 3.687e+15 3.567e+07 103368201 <2e-16 ***
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 12245 on 11794 degrees of freedom
## Residual deviance: 175821 on 11706 degrees of freedom
## AIC: 175999
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.60.5_n4_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n4_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.60.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7143 1859
## >50K 273 493
##
## Accuracy : 0.7817
## 95% CI : (0.7734, 0.7899)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 7.595e-08
##
## Kappa : 0.2245
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9632
## Specificity : 0.2096
## Pos Pred Value : 0.7935
## Neg Pred Value : 0.6436
## Prevalence : 0.7592
## Detection Rate : 0.7313
## Detection Prevalence : 0.9216
## Balanced Accuracy : 0.5864
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7143 1859
## >50K 273 493
##
## Accuracy : 0.7817
## 95% CI : (0.7734, 0.7899)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 7.595e-08
##
## Kappa : 0.2245
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9632
## Specificity : 0.2096
## Pos Pred Value : 0.7935
## Neg Pred Value : 0.6436
## Prevalence : 0.7592
## Detection Rate : 0.7313
## Detection Prevalence : 0.9216
## Balanced Accuracy : 0.5864
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n4_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.817363e-01 2.244778e-01 7.734102e-01 7.898941e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 7.595150e-08 3.106255e-258
ad_tda_kde_5.60.5_n4_lr_cf0_ov_acc<-ad_tda_kde_5.60.5_n4_lr_cf0$overall[1]
ad_tda_kde_5.60.5_n4_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9631877 0.2096088 0.7934903
## Neg Pred Value Precision Recall
## 0.6436031 0.7934903 0.9631877
## F1 Prevalence Detection Rate
## 0.8701425 0.7592138 0.7312654
## Detection Prevalence Balanced Accuracy
## 0.9215807 0.5863983
ad_tda_kde_5.60.5_n4_lr_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n4_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.60.5_n4_lr_fit_re)
diff_tda_kde_5.60.5_lr_n4_3_fold
## Accuracy
## 1 0.032093626
## 2 -0.011383864
## 3 -0.005274049
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n4_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n4_3_fold_odds.left<-bst_tda_kde_5.60.5_lr.n4_3_fold$probLeft/bst_tda_kde_5.60.5_lr.n4_3_fold$probRight
bst_tda_kde_5.60.5_lr.n4_3_fold_odds.left
## [1] 1
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n4_3_fold
## $winLeft
## [1] 0.0747
##
## $winRope
## [1] 0.4485
##
## $winRight
## [1] 0.4768
# Bayesian Correlated Test
bct_tda_kde_5.60.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n4_3_fold
## $left
## [1] 0.2181407
##
## $rope
## [1] 0.388718
##
## $right
## [1] 0.3931413
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr_n4_3_fold))
#bf_tda_kde_5.60.5_lr.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_lr_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_lr_n4_3_fold)
## t = 0.37863, df = 2, p-value = 0.7414
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.05332411 0.06361459
## sample estimates:
## mean of x
## 0.005145237
### Test set diff
diff_tda_kde_5.60.5_lr.n4_test<-(lr_cf_ov_acc - ad_tda_kde_5.60.5_n4_lr_cf0_ov_acc)
diff_tda_kde_5.60.5_lr.n4_test
## Accuracy
## 0.0715602
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr.n4_test),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n4_test_odds.left<-bst_tda_kde_5.60.5_lr.n4_test$probLeft/bst_tda_kde_5.60.5_lr.n4_test$probRight
bst_tda_kde_5.60.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr.n4_test),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1543667
##
## $winRight
## [1] 0.8456333
# Bayesian Correlated Test
bct_tda_kde_5.60.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_lr.n4_test)))
#BayesFactor
#bf_tda_kde_5.60.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr.n4_test)) #bf_tda_pca_5.60.5_lr.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_lr.n4_test))
##Node5
Adult_TDA_KDE_5.60.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.60.5.n5.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.60.5_n5_LrFit0
## Generalized Linear Model
##
## 8940 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5960, 5960, 5960
## Resampling results:
##
## Accuracy Kappa
## 0.8709172 0.4072427
Adult_TDA_KDE_5.60.5_n5_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8674497 0.4000571 Fold1
## 2 0.8734899 0.4200898 Fold2
## 3 0.8718121 0.4015813 Fold3
ad_tda_kde_5.60.5_n5_lr_fit_re<-Adult_TDA_KDE_5.60.5_n5_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n5_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (22 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.404e+13 1.235e+13 -1.137 0.255591
## V1 7.283e-02 5.831e-03 12.490 < 2e-16 ***
## V2.. 1.404e+13 1.235e+13 1.137 0.255591
## V2.Federal.gov 1.404e+13 1.235e+13 1.137 0.255591
## V2.Local.gov 1.404e+13 1.235e+13 1.137 0.255591
## V2.Never.worked 1.404e+13 1.235e+13 1.137 0.255591
## V2.Private 1.404e+13 1.235e+13 1.137 0.255591
## V2.Self.emp.inc 1.404e+13 1.235e+13 1.137 0.255591
## V2.Self.emp.not.inc 1.404e+13 1.235e+13 1.137 0.255591
## V2.State.gov 1.404e+13 1.235e+13 1.137 0.255591
## V2.Without.pay 1.404e+13 1.235e+13 1.137 0.255591
## V3 1.649e-06 8.285e-07 1.990 0.046598 *
## V4.10th NA NA NA NA
## V4.11th NA NA NA NA
## V4.12th -1.403e+00 1.046e+00 -1.341 0.179793
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm NA NA NA NA
## V4.Assoc.voc 2.923e-01 1.534e-01 1.906 0.056690 .
## V4.Bachelors NA NA NA NA
## V4.Doctorate NA NA NA NA
## V4.HS.grad -3.118e-01 7.937e-02 -3.929 8.53e-05 ***
## V4.Masters NA NA NA NA
## V4.Preschool NA NA NA NA
## V4.Prof.school NA NA NA NA
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -4.585e-01 7.803e-01 -0.588 0.556833
## V6.Married.AF.spouse 2.300e+00 1.278e+00 1.800 0.071891 .
## V6.Married.civ.spouse 2.139e+00 9.042e-01 2.366 0.017983 *
## V6.Married.spouse.absent -6.342e-01 1.078e+00 -0.588 0.556497
## V6.Never.married -7.738e-01 7.871e-01 -0.983 0.325538
## V6.Separated -6.393e-01 8.631e-01 -0.741 0.458897
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 3.840e-01 1.895e-01 2.026 0.042791 *
## V7.Armed.Forces -2.343e+01 1.285e+05 0.000 0.999855
## V7.Craft.repair 3.018e-01 1.525e-01 1.978 0.047873 *
## V7.Exec.managerial 8.867e-01 1.687e-01 5.255 1.48e-07 ***
## V7.Farming.fishing -2.974e-01 2.921e-01 -1.018 0.308570
## V7.Handlers.cleaners -4.953e-01 2.509e-01 -1.975 0.048315 *
## V7.Machine.op.inspct 1.014e-01 1.823e-01 0.556 0.578245
## V7.Other.service -4.206e-01 2.309e-01 -1.822 0.068522 .
## V7.Priv.house.serv -2.354e+01 6.361e+04 0.000 0.999705
## V7.Prof.specialty 1.081e+00 2.157e-01 5.012 5.38e-07 ***
## V7.Protective.serv 8.696e-01 2.517e-01 3.455 0.000551 ***
## V7.Sales 4.338e-01 1.710e-01 2.537 0.011180 *
## V7.Tech.support 1.195e+00 2.385e-01 5.012 5.39e-07 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.355e+00 2.669e-01 -5.076 3.85e-07 ***
## V8.Not.in.family -7.169e-01 5.348e-01 -1.340 0.180123
## V8.Other.relative -1.711e+00 5.354e-01 -3.195 0.001397 **
## V8.Own.child -1.679e+00 4.963e-01 -3.384 0.000715 ***
## V8.Unmarried -1.372e+00 5.799e-01 -2.365 0.018010 *
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.345e+00 5.176e-01 -2.598 0.009386 **
## V9.Asian.Pac.Islander 1.945e-01 4.062e-01 0.479 0.632063
## V9.Black -5.195e-01 1.794e-01 -2.895 0.003793 **
## V9.Other -1.652e+00 8.126e-01 -2.033 0.042062 *
## V9.White NA NA NA NA
## V10.Female -8.196e-01 2.329e-01 -3.520 0.000432 ***
## V10.Male NA NA NA NA
## V11 3.571e-04 2.342e-05 15.246 < 2e-16 ***
## V12 5.365e-04 8.044e-05 6.669 2.57e-11 ***
## V13 3.017e-02 3.756e-03 8.033 9.51e-16 ***
## V14.. -1.543e+00 1.401e+00 -1.101 0.270824
## V14.Cambodia -8.429e-01 1.956e+00 -0.431 0.666546
## V14.Canada -8.436e-01 1.503e+00 -0.561 0.574696
## V14.China -1.091e+00 1.679e+00 -0.650 0.515592
## V14.Columbia -2.559e+01 7.863e+04 0.000 0.999740
## V14.Cuba 4.458e-01 1.492e+00 0.299 0.765048
## V14.Dominican.Republic -1.675e+01 1.001e+03 -0.017 0.986647
## V14.Ecuador -3.927e-01 1.631e+00 -0.241 0.809662
## V14.El.Salvador -2.857e+00 1.753e+00 -1.629 0.103224
## V14.England -7.067e-01 1.627e+00 -0.434 0.664082
## V14.France -2.323e+01 1.530e+05 0.000 0.999879
## V14.Germany -1.102e+00 1.462e+00 -0.754 0.451064
## V14.Greece -3.384e+00 1.891e+00 -1.789 0.073559 .
## V14.Guatemala -1.475e+00 1.827e+00 -0.808 0.419263
## V14.Haiti -2.485e+01 8.912e+04 0.000 0.999778
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -2.423e+01 1.893e+05 0.000 0.999898
## V14.Hong -2.640e+01 1.333e+05 0.000 0.999842
## V14.Hungary -2.693e+01 2.282e+05 0.000 0.999906
## V14.India -2.796e+00 2.010e+00 -1.391 0.164229
## V14.Iran 6.867e-01 1.625e+00 0.423 0.672495
## V14.Ireland -1.407e+00 1.763e+00 -0.798 0.424838
## V14.Italy -2.642e+00 1.853e+00 -1.426 0.153821
## V14.Jamaica -3.224e-01 1.534e+00 -0.210 0.833549
## V14.Japan -2.593e+01 8.401e+04 0.000 0.999754
## V14.Laos -2.642e+01 1.500e+05 0.000 0.999859
## V14.Mexico -1.844e+00 1.454e+00 -1.268 0.204749
## V14.Nicaragua -2.524e+01 9.654e+04 0.000 0.999791
## V14.Outlying.US.Guam.USVI.etc. -2.408e+01 1.214e+05 0.000 0.999842
## V14.Peru -2.565e+01 1.102e+05 0.000 0.999814
## V14.Philippines -8.706e-01 1.538e+00 -0.566 0.571416
## V14.Poland -2.328e+00 1.750e+00 -1.330 0.183443
## V14.Portugal -8.527e-01 1.652e+00 -0.516 0.605716
## V14.Puerto.Rico -2.147e+00 1.591e+00 -1.349 0.177356
## V14.Scotland -2.647e+01 1.802e+05 0.000 0.999883
## V14.South -1.774e+00 1.684e+00 -1.053 0.292268
## V14.Taiwan -2.328e+00 1.917e+00 -1.215 0.224526
## V14.Thailand -1.935e+00 2.031e+00 -0.953 0.340831
## V14.Trinadad.Tobago -2.633e+01 1.455e+05 0.000 0.999856
## V14.United.States -1.167e+00 1.359e+00 -0.859 0.390520
## V14.Vietnam -1.930e+00 1.809e+00 -1.067 0.286045
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 7692.0 on 8939 degrees of freedom
## Residual deviance: 4776.9 on 8853 degrees of freedom
## AIC: 4950.9
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.60.5_n5_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.60.5_n5_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.60.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6568 911
## >50K 848 1441
##
## Accuracy : 0.8199
## 95% CI : (0.8122, 0.8275)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5029
##
## Mcnemar's Test P-Value : 0.1393
##
## Sensitivity : 0.8857
## Specificity : 0.6127
## Pos Pred Value : 0.8782
## Neg Pred Value : 0.6295
## Prevalence : 0.7592
## Detection Rate : 0.6724
## Detection Prevalence : 0.7657
## Balanced Accuracy : 0.7492
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6568 911
## >50K 848 1441
##
## Accuracy : 0.8199
## 95% CI : (0.8122, 0.8275)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5029
##
## Mcnemar's Test P-Value : 0.1393
##
## Sensitivity : 0.8857
## Specificity : 0.6127
## Pos Pred Value : 0.8782
## Neg Pred Value : 0.6295
## Prevalence : 0.7592
## Detection Rate : 0.6724
## Detection Prevalence : 0.7657
## Balanced Accuracy : 0.7492
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n5_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.199222e-01 5.029221e-01 8.121559e-01 8.274973e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 7.911664e-48 1.393310e-01
ad_tda_kde_5.60.5_n5_lr_cf0_ov_acc<-ad_tda_kde_5.60.5_n5_lr_cf0$overall[1]
ad_tda_kde_5.60.5_n5_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8856526 0.6126701 0.8781923
## Neg Pred Value Precision Recall
## 0.6295325 0.8781923 0.8856526
## F1 Prevalence Detection Rate
## 0.8819067 0.7592138 0.6723997
## Detection Prevalence Balanced Accuracy
## 0.7656634 0.7491614
ad_tda_kde_5.60.5_n5_lr_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n5_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.60.5_n5_lr_fit_re)
diff_tda_kde_5.60.5_lr_n5_3_fold
## Accuracy
## 1 -0.01775238
## 2 -0.02526037
## 3 -0.01645541
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n5_3_fold_odds.left<-bst_tda_kde_5.60.5_lr.n5_3_fold$probLeft/bst_tda_kde_5.60.5_lr.n5_3_fold$probRight
bst_tda_kde_5.60.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n5_3_fold
## $winLeft
## [1] 0.9106
##
## $winRope
## [1] 0.0894
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n5_3_fold
## $left
## [1] 0.9548888
##
## $rope
## [1] 0.03955913
##
## $right
## [1] 0.005552066
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr_n5_3_fold))
#bf_tda_kde_5.60.5_lr.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_lr_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_lr_n5_3_fold)
## t = -7.2227, df = 2, p-value = 0.01863
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.031631277 -0.008014158
## sample estimates:
## mean of x
## -0.01982272
### Test set diff
diff_tda_kde_5.60.5_lr.n5_test<-(lr_cf_ov_acc - ad_tda_kde_5.60.5_n5_lr_cf0_ov_acc)
diff_tda_kde_5.60.5_lr.n5_test
## Accuracy
## 0.03337428
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_lr.n5_test),-0.01,0.01)
bst_tda_kde_5.60.5_lr.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_lr.n5_test_odds.left<-bst_tda_kde_5.60.5_lr.n5_test$probLeft/bst_tda_kde_5.60.5_lr.n5_test$probRight
bst_tda_kde_5.60.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_lr.n5_test),-0.01,0.01)
bsr_tda_kde_5.60.5_lr.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1586
##
## $winRight
## [1] 0.8414
# Bayesian Correlated Test
bct_tda_kde_5.60.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_lr.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_lr.n5_test)))
#BayesFactor
#bf_tda_kde_5.60.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_lr.n5_test)) #bf_tda_pca_5.60.5_lr.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_lr.n5_test))
#naiveBayes
adultNbFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Haiti, V14.Holand.Netherlands, V14.Hungary, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V14.Guatemala, V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V14.Dominican.Republic, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
adultNbFit
## Naive Bayes
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15195, 15196, 15195
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7648843 0.03503428
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
adultNbFit$resample
## Accuracy Kappa Resample
## 1 0.7591471 0.00000000 Fold1
## 2 0.7735948 0.08778280 Fold2
## 3 0.7619110 0.01732004 Fold3
ad_nb_fit_re<-adultNbFit$resample[1]
summary(adultNbFit)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
#varImp (adultNbFit)
# Predict outcome using model from training data based on testing data
predictions <- predict(adultNbFit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
nb_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nb_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2219
## >50K 0 133
##
## Accuracy : 0.7728
## 95% CI : (0.7644, 0.7811)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.0008047
##
## Kappa : 0.0834
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.00000
## Specificity : 0.05655
## Pos Pred Value : 0.76969
## Neg Pred Value : 1.00000
## Prevalence : 0.75921
## Detection Rate : 0.75921
## Detection Prevalence : 0.98638
## Balanced Accuracy : 0.52827
##
## 'Positive' Class : <=50K
##
nb_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.772829648 0.083417992 0.764388511 0.781107809 0.759213759
## AccuracyPValue McnemarPValue
## 0.000804745 0.000000000
nb_cf_ov_acc<-nb_cf$overall[1]
nb_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 1.00000000 0.05654762 0.76969382
## Neg Pred Value Precision Recall
## 1.00000000 0.76969382 1.00000000
## F1 Prevalence Detection Rate
## 0.86986101 0.75921376 0.75921376
## Detection Prevalence Balanced Accuracy
## 0.98638411 0.52827381
nb_cf_pre_rec_f1<-nb_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.60.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n1.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Handlers.cleaners, V7.Priv.house.serv, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V14.Cambodia, V14.Columbia, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.11th, V4.1st.4th, V4.5th.6th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Handlers.cleaners, V7.Priv.house.serv, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V10.Female, V10.Male, V14.Cambodia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.11th, V4.1st.4th, V4.5th.6th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Handlers.cleaners, V7.Priv.house.serv, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V10.Female, V10.Male, V14.Cambodia, V14.China, V14.Columbia, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.Germany, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.60.5_n1_NbFit0
## Naive Bayes
##
## 6560 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 4374, 4373, 4373
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.8948171 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.60.5_n1_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.8947850 0 Fold1
## 2 0.8948331 0 Fold2
## 3 0.8948331 0 Fold3
ad_tda_pc_5.60.5_n1_nb_fit_re<-Adult_TDA_PC_5.60.5_n1_NbFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n1_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.60.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n1_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n1_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2407862 0.0000000 0.2323343 0.2493929 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.60.5_n1_nb_cf0_ov_acc<-ad_tda_pc_5.60.5_n1_nb_cf0$overall[1]
ad_tda_pc_5.60.5_n1_nb_cf0$byClas1
## NULL
ad_tda_pc_5.60.5_n1_nb_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n1_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.60.5_n1_nb_fit_re)
diff_tda_pca_5.60.5_nb_n1_3_fold
## Accuracy
## 1 -0.1356379
## 2 -0.1212383
## 3 -0.1329221
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n1_3_fold_odds.left<-bst_tda_pca_5.60.5_nb.n1_3_fold$probLeft/bst_tda_pca_5.60.5_nb.n1_3_fold$probRight
bst_tda_pca_5.60.5_nb.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n1_3_fold
## $winLeft
## [1] 0.9909667
##
## $winRope
## [1] 0.009033333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n1_3_fold
## $left
## [1] 0.9990981
##
## $rope
## [1] 0.0002389229
##
## $right
## [1] 0.0006630265
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb_n1_3_fold))
#bf_tda_pca_5.60.5_nb.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nb_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_nb_n1_3_fold)
## t = -29.414, df = 2, p-value = 0.001154
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1489391 -0.1109264
## sample estimates:
## mean of x
## -0.1299327
### Test set diff
diff_tda_pca_5.60.5_nb.n1_test<-(nb_cf_ov_acc - ad_tda_pc_5.60.5_n1_nb_cf0_ov_acc)
diff_tda_pca_5.60.5_nb.n1_test
## Accuracy
## 0.5320434
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb.n1_test),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n1_test_odds.left<-bst_tda_pca_5.60.5_nb.n1_test$probLeft/bst_tda_pca_5.60.5_nb.n1_test$probRight
bst_tda_pca_5.60.5_nb.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb.n1_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1584333
##
## $winRight
## [1] 0.8415667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nb.n1_test)))
#BayesFactor
#bf_tda_pca_5.60.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb.n1_test)) #bf_tda_pca_5.60.5_nb.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nb.n1_test))
##Node2
Adult_TDA_PC_5.60.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Vietnam
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Thailand
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Scotland
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.60.5_n2_NbFit0
## Naive Bayes
##
## 13933 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9288, 9289, 9289
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.5162561 0.04903134
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.60.5_n2_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.5194833 0.05526457 Fold1
## 2 0.5217485 0.05982785 Fold2
## 3 0.5075366 0.03200161 Fold3
ad_tda_pc_5.60.5_n2_nb_fit_re<-Adult_TDA_PC_5.60.5_n2_NbFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n2_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.60.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n2_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7409 2304
## >50K 7 48
##
## Accuracy : 0.7634
## 95% CI : (0.7549, 0.7718)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.169
##
## Kappa : 0.0292
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.99906
## Specificity : 0.02041
## Pos Pred Value : 0.76279
## Neg Pred Value : 0.87273
## Prevalence : 0.75921
## Detection Rate : 0.75850
## Detection Prevalence : 0.99437
## Balanced Accuracy : 0.50973
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7409 2304
## >50K 7 48
##
## Accuracy : 0.7634
## 95% CI : (0.7549, 0.7718)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.169
##
## Kappa : 0.0292
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.99906
## Specificity : 0.02041
## Pos Pred Value : 0.76279
## Neg Pred Value : 0.87273
## Prevalence : 0.75921
## Detection Rate : 0.75850
## Detection Prevalence : 0.99437
## Balanced Accuracy : 0.50973
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n2_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.76341114 0.02920106 0.75485419 0.77181076 0.75921376
## AccuracyPValue McnemarPValue
## 0.16896721 0.00000000
ad_tda_pc_5.60.5_n2_nb_cf0_ov_acc<-ad_tda_pc_5.60.5_n2_nb_cf0$overall[1]
ad_tda_pc_5.60.5_n2_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.99905609 0.02040816 0.76279213
## Neg Pred Value Precision Recall
## 0.87272727 0.76279213 0.99905609
## F1 Prevalence Detection Rate
## 0.86508261 0.75921376 0.75849713
## Detection Prevalence Balanced Accuracy
## 0.99436937 0.50973213
ad_tda_pc_5.60.5_n2_nb_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n2_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.60.5_n2_nb_fit_re)
diff_tda_pca_5.60.5_nb_n2_3_fold
## Accuracy
## 1 0.2396638
## 2 0.2518463
## 3 0.2543744
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n2_3_fold_odds.left<-bst_tda_pca_5.60.5_nb.n2_3_fold$probLeft/bst_tda_pca_5.60.5_nb.n2_3_fold$probRight
bst_tda_pca_5.60.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009733333
##
## $winRight
## [1] 0.9902667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n2_3_fold
## $left
## [1] 0.0002054151
##
## $rope
## [1] 3.584964e-05
##
## $right
## [1] 0.9997587
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb_n2_3_fold))
#bf_tda_pca_5.60.5_nb.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nb_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_nb_n2_3_fold)
## t = 54.749, df = 2, p-value = 0.0003334
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.2290890 0.2681674
## sample estimates:
## mean of x
## 0.2486282
### Test set diff
diff_tda_pca_5.60.5_nb.n2_test<-(nb_cf_ov_acc - ad_tda_pc_5.60.5_n2_nb_cf0_ov_acc)
diff_tda_pca_5.60.5_nb.n2_test
## Accuracy
## 0.009418509
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb.n2_test),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n2_test_odds.left<-bst_tda_pca_5.60.5_nb.n2_test$probLeft/bst_tda_pca_5.60.5_nb.n2_test$probRight
bst_tda_pca_5.60.5_nb.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nb.n2_test)))
#BayesFactor
#bf_tda_pca_5.60.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb.n2_test)) #bf_tda_pca_5.60.5_nb.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nb.n2_test))
##Node3
Adult_TDA_PC_5.60.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n3.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Columbia, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Thailand
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.60.5_n3_NbFit0
## Naive Bayes
##
## 15744 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 10496, 10497, 10495
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7447282 0.003313477
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.60.5_n3_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7458079 0.009940431 Fold1
## 2 0.7442348 0.000000000 Fold2
## 3 0.7441417 0.000000000 Fold3
ad_tda_pc_5.60.5_n3_nb_fit_re<-Adult_TDA_PC_5.60.5_n3_NbFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n3_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.60.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n3_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2205
## >50K 0 147
##
## Accuracy : 0.7743
## 95% CI : (0.7658, 0.7825)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.0002414
##
## Kappa : 0.0919
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0625
## Pos Pred Value : 0.7708
## Neg Pred Value : 1.0000
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 0.9850
## Balanced Accuracy : 0.5312
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2205
## >50K 0 147
##
## Accuracy : 0.7743
## 95% CI : (0.7658, 0.7825)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.0002414
##
## Kappa : 0.0919
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0625
## Pos Pred Value : 0.7708
## Neg Pred Value : 1.0000
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 0.9850
## Balanced Accuracy : 0.5312
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n3_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7742628993 0.0919232485 0.7658399046 0.7825220598 0.7592137592
## AccuracyPValue McnemarPValue
## 0.0002413576 0.0000000000
ad_tda_pc_5.60.5_n3_nb_cf0_ov_acc<-ad_tda_pc_5.60.5_n3_nb_cf0$overall[1]
ad_tda_pc_5.60.5_n3_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0625000 0.7708138
## Neg Pred Value Precision Recall
## 1.0000000 0.7708138 1.0000000
## F1 Prevalence Detection Rate
## 0.8705758 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 0.9849509 0.5312500
ad_tda_pc_5.60.5_n3_nb_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n3_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.60.5_n3_nb_fit_re)
diff_tda_pca_5.60.5_nb_n3_3_fold
## Accuracy
## 1 0.01333922
## 2 0.02936004
## 3 0.01776929
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n3_3_fold_odds.left<-bst_tda_pca_5.60.5_nb.n3_3_fold$probLeft/bst_tda_pca_5.60.5_nb.n3_3_fold$probRight
bst_tda_pca_5.60.5_nb.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.09133333
##
## $winRight
## [1] 0.9086667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n3_3_fold
## $left
## [1] 0.0159292
##
## $rope
## [1] 0.08751828
##
## $right
## [1] 0.8965525
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb_n3_3_fold))
#bf_tda_pca_5.60.5_nb.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nb_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_nb_n3_3_fold)
## t = 4.22, df = 2, p-value = 0.05183
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.0003946547 0.0407070175
## sample estimates:
## mean of x
## 0.02015618
### Test set diff
diff_tda_pca_5.60.5_nb.n3_test<-(nb_cf_ov_acc - ad_tda_pc_5.60.5_n3_nb_cf0_ov_acc)
diff_tda_pca_5.60.5_nb.n3_test
## Accuracy
## -0.001433251
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb.n3_test),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n3_test_odds.left<-bst_tda_pca_5.60.5_nb.n3_test$probLeft/bst_tda_pca_5.60.5_nb.n3_test$probRight
bst_tda_pca_5.60.5_nb.n3_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nb.n3_test)))
#BayesFactor
#bf_tda_pca_5.60.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb.n3_test)) #bf_tda_pca_5.60.5_nb.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nb.n3_test))
##Node4
Adult_TDA_PC_5.60.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n4.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Iran, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V14.Cambodia, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Iran, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.Greece, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.60.5_n4_NbFit0
## Naive Bayes
##
## 19829 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 13219, 13220, 13219
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9351455 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.60.5_n4_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9350983 0 Fold1
## 2 0.9352398 0 Fold2
## 3 0.9350983 0 Fold3
ad_tda_pc_5.60.5_n4_nb_fit_re<-Adult_TDA_PC_5.60.5_n4_NbFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n4_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.60.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n4_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n4_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.60.5_n4_nb_cf0_ov_acc<-ad_tda_pc_5.60.5_n4_nb_cf0$overall[1]
ad_tda_pc_5.60.5_n4_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.60.5_n4_nb_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n4_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.60.5_n4_nb_fit_re)
diff_tda_pca_5.60.5_nb_n4_3_fold
## Accuracy
## 1 -0.1759512
## 2 -0.1616450
## 3 -0.1731873
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n4_3_fold_odds.left<-bst_tda_pca_5.60.5_nb.n4_3_fold$probLeft/bst_tda_pca_5.60.5_nb.n4_3_fold$probRight
bst_tda_pca_5.60.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n4_3_fold
## $winLeft
## [1] 0.9905
##
## $winRope
## [1] 0.0095
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n4_3_fold
## $left
## [1] 0.9995025
##
## $rope
## [1] 0.0001041544
##
## $right
## [1] 0.0003933754
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold))
#bf_tda_pca_5.60.5_nb.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold)
## t = -38.86, df = 2, p-value = 0.0006615
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1891126 -0.1514097
## sample estimates:
## mean of x
## -0.1702612
### Test set diff
diff_tda_pca_5.60.5_nb.n4_test<-(nb_cf_ov_acc - ad_tda_pc_5.60.5_n4_nb_cf0_ov_acc)
diff_tda_pca_5.60.5_nb.n4_test
## Accuracy
## 0.01361589
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb.n4_test),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n4_test_odds.left<-bst_tda_pca_5.60.5_nb.n4_test$probLeft/bst_tda_pca_5.60.5_nb.n4_test$probRight
bst_tda_pca_5.60.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb.n4_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4523333
##
## $winRight
## [1] 0.5476667
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nb.n4_test)))
#BayesFactor
#bf_tda_pca_5.60.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb.n4_test)) #bf_tda_pca_5.60.5_nb.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nb.n4_test))
##Node5
Adult_TDA_PC_5.60.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n5.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.1st.4th, V4.Doctorate, V4.Preschool, V4.Prof.school, V7.Armed.Forces, V7.Farming.fishing, V7.Handlers.cleaners, V7.Transport.moving, V8.Husband, V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Italy, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.1st.4th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school, V7.Armed.Forces, V7.Farming.fishing, V7.Machine.op.inspct, V7.Transport.moving, V8.Husband, V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.France, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Italy, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.1st.4th, V4.Doctorate, V4.Preschool, V4.Prof.school, V7.Armed.Forces, V7.Farming.fishing, V7.Priv.house.serv, V7.Transport.moving, V8.Husband, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.France, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.60.5_n5_NbFit0
## Naive Bayes
##
## 16508 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11005, 11005, 11006
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.992125 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.60.5_n5_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9921861 0 Fold1
## 2 0.9920044 0 Fold2
## 3 0.9921847 0 Fold3
ad_tda_pc_5.60.5_n5_nb_fit_re<-Adult_TDA_PC_5.60.5_n5_NbFit0$resample[1]
summary(Adult_TDA_PC_5.60.5_n5_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.60.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.60.5_n5_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.60.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.60.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.60.5_n5_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.60.5_n5_nb_cf0_ov_acc<-ad_tda_pc_5.60.5_n5_nb_cf0$overall[1]
ad_tda_pc_5.60.5_n5_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.60.5_n5_nb_cf0_pre_rec_f1<-ad_tda_pc_5.60.5_n5_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.60.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.60.5_n5_nb_fit_re)
diff_tda_pca_5.60.5_nb_n5_3_fold
## Accuracy
## 1 -0.2330389
## 2 -0.2184095
## 3 -0.2302736
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n5_3_fold_odds.left<-bst_tda_pca_5.60.5_nb.n5_3_fold$probLeft/bst_tda_pca_5.60.5_nb.n5_3_fold$probRight
bst_tda_pca_5.60.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n5_3_fold
## $winLeft
## [1] 0.9907
##
## $winRope
## [1] 0.0093
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n5_3_fold
## $left
## [1] 0.9997158
##
## $rope
## [1] 4.586246e-05
##
## $right
## [1] 0.0002383219
# Rope Plot
plot(rope(diff_tda_pca_5.60.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.60.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb_n5_3_fold))
#bf_tda_pca_5.60.5_nb.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.60.5_nb_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.60.5_nb_n5_3_fold)
## t = -50.642, df = 2, p-value = 0.0003897
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2465474 -0.2079340
## sample estimates:
## mean of x
## -0.2272407
### Test set diff
diff_tda_pca_5.60.5_nb.n5_test<-(nb_cf_ov_acc - ad_tda_pc_5.60.5_n5_nb_cf0_ov_acc)
diff_tda_pca_5.60.5_nb.n5_test
## Accuracy
## 0.01361589
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.60.5_nb.n5_test),-0.01,0.01)
bst_tda_pca_5.60.5_nb.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.60.5_nb.n5_test_odds.left<-bst_tda_pca_5.60.5_nb.n5_test$probLeft/bst_tda_pca_5.60.5_nb.n5_test$probRight
bst_tda_pca_5.60.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.60.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.60.5_nb.n5_test),-0.01,0.01)
bsr_tda_pca_5.60.5_nb.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.457
##
## $winRight
## [1] 0.543
# Bayesian Correlated Test
bct_tda_pca_5.60.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.60.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.60.5_nb.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.60.5_nb.n5_test)))
#BayesFactor
#bf_tda_pca_5.60.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.60.5_nb.n5_test)) #bf_tda_pca_5.60.5_nb.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.60.5_nb.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.60.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.60.5.n1.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Scotland
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Columbia, V14.Dominican.Republic, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.60.5_n1_NbFit0
## Naive Bayes
##
## 15260 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 10174, 10172, 10174
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7506543 0.05894399
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.60.5_n1_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7445930 0.02553800 Fold1
## 2 0.7582547 0.10063763 Fold2
## 3 0.7491152 0.05065635 Fold3
ad_tda_kde_5.60.5_n1_nb_fit_re<-Adult_TDA_KDE_5.60.5_n1_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n1_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.60.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n1_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2314
## >50K 0 38
##
## Accuracy : 0.7631
## 95% CI : (0.7545, 0.7715)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.1875
##
## Kappa : 0.0243
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.00000
## Specificity : 0.01616
## Pos Pred Value : 0.76218
## Neg Pred Value : 1.00000
## Prevalence : 0.75921
## Detection Rate : 0.75921
## Detection Prevalence : 0.99611
## Balanced Accuracy : 0.50808
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2314
## >50K 0 38
##
## Accuracy : 0.7631
## 95% CI : (0.7545, 0.7715)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.1875
##
## Kappa : 0.0243
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.00000
## Specificity : 0.01616
## Pos Pred Value : 0.76218
## Neg Pred Value : 1.00000
## Prevalence : 0.75921
## Detection Rate : 0.75921
## Detection Prevalence : 0.99611
## Balanced Accuracy : 0.50808
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n1_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.76310401 0.02432864 0.75454338 0.77150750 0.75921376
## AccuracyPValue McnemarPValue
## 0.18754208 0.00000000
ad_tda_kde_5.60.5_n1_nb_cf0_ov_acc<-ad_tda_kde_5.60.5_n1_nb_cf0$overall[1]
ad_tda_kde_5.60.5_n1_nb_cf0$byClas1
## NULL
ad_tda_kde_5.60.5_n1_nb_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n1_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.60.5_n1_nb_fit_re)
diff_tda_kde_5.60.5_nb_n1_3_fold
## Accuracy
## 1 0.01455414
## 2 0.01534012
## 3 0.01279581
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n1_3_fold_odds.left<-bst_tda_kde_5.60.5_nb.n1_3_fold$probLeft/bst_tda_kde_5.60.5_nb.n1_3_fold$probRight
bst_tda_kde_5.60.5_nb.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1689667
##
## $winRight
## [1] 0.8310333
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n1_3_fold
## $left
## [1] 0.000641164
##
## $rope
## [1] 0.0191911
##
## $right
## [1] 0.9801677
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb_n1_3_fold))
#bf_tda_kde_5.60.5_nb.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nb_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_nb_n1_3_fold)
## t = 18.919, df = 2, p-value = 0.002782
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.01099380 0.01746625
## sample estimates:
## mean of x
## 0.01423003
### Test set diff
diff_tda_kde_5.60.5_nb.n1_test<-(nb_cf_ov_acc - ad_tda_kde_5.60.5_n1_nb_cf0_ov_acc)
diff_tda_kde_5.60.5_nb.n1_test
## Accuracy
## 0.009725635
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb.n1_test),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n1_test_odds.left<-bst_tda_kde_5.60.5_nb.n1_test$probLeft/bst_tda_kde_5.60.5_nb.n1_test$probRight
bst_tda_kde_5.60.5_nb.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb.n1_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nb.n1_test)))
#BayesFactor
#bf_tda_kde_5.60.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb.n1_test)) #bf_tda_pca_5.60.5_nb.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nb.n1_test))
##Node2
Adult_TDA_KDE_5.60.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n2.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago, V14.Vietnam
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Columbia, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.60.5_n2_NbFit0
## Naive Bayes
##
## 13933 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9288, 9289, 9289
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.5231467 0.06233996
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.60.5_n2_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.5207750 0.05776039 Fold1
## 2 0.5269165 0.06981230 Fold2
## 3 0.5217485 0.05944720 Fold3
ad_tda_kde_5.60.5_n2_nb_fit_re<-Adult_TDA_KDE_5.60.5_n2_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n2_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.60.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n2_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7409 2304
## >50K 7 48
##
## Accuracy : 0.7634
## 95% CI : (0.7549, 0.7718)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.169
##
## Kappa : 0.0292
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.99906
## Specificity : 0.02041
## Pos Pred Value : 0.76279
## Neg Pred Value : 0.87273
## Prevalence : 0.75921
## Detection Rate : 0.75850
## Detection Prevalence : 0.99437
## Balanced Accuracy : 0.50973
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7409 2304
## >50K 7 48
##
## Accuracy : 0.7634
## 95% CI : (0.7549, 0.7718)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.169
##
## Kappa : 0.0292
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.99906
## Specificity : 0.02041
## Pos Pred Value : 0.76279
## Neg Pred Value : 0.87273
## Prevalence : 0.75921
## Detection Rate : 0.75850
## Detection Prevalence : 0.99437
## Balanced Accuracy : 0.50973
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n2_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.76341114 0.02920106 0.75485419 0.77181076 0.75921376
## AccuracyPValue McnemarPValue
## 0.16896721 0.00000000
ad_tda_kde_5.60.5_n2_nb_cf0_ov_acc<-ad_tda_kde_5.60.5_n2_nb_cf0$overall[1]
ad_tda_kde_5.60.5_n2_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.99905609 0.02040816 0.76279213
## Neg Pred Value Precision Recall
## 0.87272727 0.76279213 0.99905609
## F1 Prevalence Detection Rate
## 0.86508261 0.75921376 0.75849713
## Detection Prevalence Balanced Accuracy
## 0.99436937 0.50973213
ad_tda_kde_5.60.5_n2_nb_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n2_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.60.5_n2_nb_fit_re)
diff_tda_kde_5.60.5_nb_n2_3_fold
## Accuracy
## 1 0.2383721
## 2 0.2466784
## 3 0.2401625
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n2_3_fold_odds.left<-bst_tda_kde_5.60.5_nb.n2_3_fold$probLeft/bst_tda_kde_5.60.5_nb.n2_3_fold$probRight
bst_tda_kde_5.60.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0102
##
## $winRight
## [1] 0.9898
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n2_3_fold
## $left
## [1] 6.699611e-05
##
## $rope
## [1] 1.206029e-05
##
## $right
## [1] 0.9999209
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb_n2_3_fold))
#bf_tda_kde_5.60.5_nb.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nb_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_nb_n2_3_fold)
## t = 95.782, df = 2, p-value = 0.000109
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.2308785 0.2525969
## sample estimates:
## mean of x
## 0.2417377
### Test set diff
diff_tda_kde_5.60.5_nb.n2_test<-(nb_cf_ov_acc - ad_tda_kde_5.60.5_n2_nb_cf0_ov_acc)
diff_tda_kde_5.60.5_nb.n2_test
## Accuracy
## 0.009418509
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb.n2_test),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n2_test_odds.left<-bst_tda_kde_5.60.5_nb.n2_test$probLeft/bst_tda_kde_5.60.5_nb.n2_test$probRight
bst_tda_kde_5.60.5_nb.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb.n2_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nb.n2_test)))
#BayesFactor
#bf_tda_kde_5.60.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb.n2_test)) #bf_tda_kde_5.60.5_nb.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nb.n2_test))
##Node3
Adult_TDA_KDE_5.60.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n3.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Portugal, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Columbia, V14.El.Salvador, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Ecuador, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Vietnam
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.60.5_n3_NbFit0
## Naive Bayes
##
## 15744 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 10497, 10495, 10496
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7444106 0.001476707
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.60.5_n3_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7449971 0.004430121 Fold1
## 2 0.7441417 0.000000000 Fold2
## 3 0.7440930 0.000000000 Fold3
ad_tda_kde_5.60.5_n3_nb_fit_re<-Adult_TDA_KDE_5.60.5_n3_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n3_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.60.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n3_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2205
## >50K 0 147
##
## Accuracy : 0.7743
## 95% CI : (0.7658, 0.7825)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.0002414
##
## Kappa : 0.0919
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0625
## Pos Pred Value : 0.7708
## Neg Pred Value : 1.0000
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 0.9850
## Balanced Accuracy : 0.5312
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2205
## >50K 0 147
##
## Accuracy : 0.7743
## 95% CI : (0.7658, 0.7825)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.0002414
##
## Kappa : 0.0919
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0625
## Pos Pred Value : 0.7708
## Neg Pred Value : 1.0000
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 0.9850
## Balanced Accuracy : 0.5312
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n3_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7742628993 0.0919232485 0.7658399046 0.7825220598 0.7592137592
## AccuracyPValue McnemarPValue
## 0.0002413576 0.0000000000
ad_tda_kde_5.60.5_n3_nb_cf0_ov_acc<-ad_tda_kde_5.60.5_n3_nb_cf0$overall[1]
ad_tda_kde_5.60.5_n3_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0625000 0.7708138
## Neg Pred Value Precision Recall
## 1.0000000 0.7708138 1.0000000
## F1 Prevalence Detection Rate
## 0.8705758 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 0.9849509 0.5312500
ad_tda_kde_5.60.5_n3_nb_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n3_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.60.5_n3_nb_fit_re)
diff_tda_kde_5.60.5_nb_n3_3_fold
## Accuracy
## 1 0.01415000
## 2 0.02945310
## 3 0.01781804
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n3_3_fold_odds.left<-bst_tda_kde_5.60.5_nb.n3_3_fold$probLeft/bst_tda_kde_5.60.5_nb.n3_3_fold$probRight
bst_tda_kde_5.60.5_nb.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0875
##
## $winRight
## [1] 0.9125
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n3_3_fold
## $left
## [1] 0.01460946
##
## $rope
## [1] 0.07947101
##
## $right
## [1] 0.9059195
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb_n3_3_fold))
#bf_tda_kde_5.60.5_nb.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nb_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_nb_n3_3_fold)
## t = 4.4384, df = 2, p-value = 0.0472
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.000626148 0.040321281
## sample estimates:
## mean of x
## 0.02047371
### Test set diff
diff_tda_kde_5.60.5_nb.n3_test<-(nb_cf_ov_acc - ad_tda_kde_5.60.5_n3_nb_cf0_ov_acc)
diff_tda_kde_5.60.5_nb.n3_test
## Accuracy
## -0.001433251
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb.n3_test),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n3_test_odds.left<-bst_tda_kde_5.60.5_nb.n3_test$probLeft/bst_tda_kde_5.60.5_nb.n3_test$probRight
bst_tda_kde_5.60.5_nb.n3_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nb.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb.n3_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nb.n3_test)))
#BayesFactor
#bf_tda_kde_5.60.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb.n3_test)) #bf_tda_kde_5.60.5_nb.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nb.n3_test))
##Node4
Adult_TDA_KDE_5.60.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n4.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Iran, V14.Jamaica, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Iran, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.60.5_n4_NbFit0
## Naive Bayes
##
## 19829 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 13219, 13220, 13219
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9351455 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.60.5_n4_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9350983 0 Fold1
## 2 0.9352398 0 Fold2
## 3 0.9350983 0 Fold3
ad_tda_kde_5.60.5_n4_nb_fit_re<-Adult_TDA_KDE_5.60.5_n4_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n4_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.60.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n4_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n4_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_kde_5.60.5_n4_nb_cf0_ov_acc<-ad_tda_kde_5.60.5_n4_nb_cf0$overall[1]
ad_tda_kde_5.60.5_n4_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_kde_5.60.5_n4_nb_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n4_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.60.5_n4_nb_fit_re)
diff_tda_kde_5.60.5_nb_n4_3_fold
## Accuracy
## 1 -0.1759512
## 2 -0.1616450
## 3 -0.1731873
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n4_3_fold_odds.left<-bst_tda_kde_5.60.5_nb.n4_3_fold$probLeft/bst_tda_kde_5.60.5_nb.n4_3_fold$probRight
bst_tda_kde_5.60.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n4_3_fold
## $winLeft
## [1] 0.9909
##
## $winRope
## [1] 0.0091
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n4_3_fold
## $left
## [1] 0.9995025
##
## $rope
## [1] 0.0001041544
##
## $right
## [1] 0.0003933754
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb_n4_3_fold))
#bf_tda_kde_5.60.5_nb.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nb_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_nb_n4_3_fold)
## t = -38.86, df = 2, p-value = 0.0006615
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1891126 -0.1514097
## sample estimates:
## mean of x
## -0.1702612
### Test set diff
diff_tda_kde_5.60.5_nb.n4_test<-(nb_cf_ov_acc - ad_tda_kde_5.60.5_n4_nb_cf0_ov_acc)
diff_tda_kde_5.60.5_nb.n4_test
## Accuracy
## 0.01361589
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb.n4_test),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n4_test_odds.left<-bst_tda_kde_5.60.5_nb.n4_test$probLeft/bst_tda_kde_5.60.5_nb.n4_test$probRight
bst_tda_kde_5.60.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb.n4_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4578667
##
## $winRight
## [1] 0.5421333
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nb.n4_test)))
#BayesFactor
#bf_tda_kde_5.60.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb.n4_test)) #bf_tda_kde_5.60.5_nb.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nb.n4_test))
##Node5
Adult_TDA_KDE_5.60.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.60.5.n5.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.1st.4th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V7.Armed.Forces, V7.Farming.fishing, V7.Transport.moving, V8.Husband, V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.France, V14.Germany, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Italy, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.1st.4th, V4.5th.6th, V4.Doctorate, V4.Preschool, V4.Prof.school, V7.Armed.Forces, V7.Farming.fishing, V7.Handlers.cleaners, V7.Transport.moving, V8.Husband, V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.1st.4th, V4.Doctorate, V4.Preschool, V4.Prof.school, V7.Armed.Forces, V7.Farming.fishing, V7.Priv.house.serv, V7.Transport.moving, V8.Husband, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.France, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Italy, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.60.5_n5_NbFit0
## Naive Bayes
##
## 16508 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 11005, 11006, 11005
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.992125 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.60.5_n5_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9921861 0 Fold1
## 2 0.9921847 0 Fold2
## 3 0.9920044 0 Fold3
ad_tda_kde_5.60.5_n5_nb_fit_re<-Adult_TDA_KDE_5.60.5_n5_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.60.5_n5_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.60.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.60.5_n5_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.60.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.60.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.60.5_n5_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_kde_5.60.5_n5_nb_cf0_ov_acc<-ad_tda_kde_5.60.5_n5_nb_cf0$overall[1]
ad_tda_kde_5.60.5_n5_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_kde_5.60.5_n5_nb_cf0_pre_rec_f1<-ad_tda_kde_5.60.5_n5_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.60.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.60.5_n5_nb_fit_re)
diff_tda_kde_5.60.5_nb_n5_3_fold
## Accuracy
## 1 -0.2330389
## 2 -0.2185898
## 3 -0.2300933
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n5_3_fold_odds.left<-bst_tda_kde_5.60.5_nb.n5_3_fold$probLeft/bst_tda_kde_5.60.5_nb.n5_3_fold$probRight
bst_tda_kde_5.60.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n5_3_fold
## $winLeft
## [1] 0.9912
##
## $winRope
## [1] 0.0088
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n5_3_fold
## $left
## [1] 0.9997257
##
## $rope
## [1] 4.426543e-05
##
## $right
## [1] 0.0002300162
# Rope Plot
plot(rope(diff_tda_kde_5.60.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.60.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb_n5_3_fold))
#bf_tda_kde_5.60.5_nb.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.60.5_nb_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.60.5_nb_n5_3_fold)
## t = -51.549, df = 2, p-value = 0.0003761
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2462078 -0.2082736
## sample estimates:
## mean of x
## -0.2272407
### Test set diff
diff_tda_kde_5.60.5_nb.n5_test<-(nb_cf_ov_acc - ad_tda_kde_5.60.5_n5_nb_cf0_ov_acc)
diff_tda_kde_5.60.5_nb.n5_test
## Accuracy
## 0.01361589
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.60.5_nb.n5_test),-0.01,0.01)
bst_tda_kde_5.60.5_nb.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.60.5_nb.n5_test_odds.left<-bst_tda_kde_5.60.5_nb.n5_test$probLeft/bst_tda_kde_5.60.5_nb.n5_test$probRight
bst_tda_kde_5.60.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.60.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.60.5_nb.n5_test),-0.01,0.01)
bsr_tda_kde_5.60.5_nb.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4567333
##
## $winRight
## [1] 0.5432667
# Bayesian Correlated Test
bct_tda_kde_5.60.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.60.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.60.5_nb.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.60.5_nb.n5_test)))
#BayesFactor
#bf_tda_kde_5.60.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.60.5_nb.n5_test)) #bf_tda_kde_5.60.5_nb.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.60.5_nb.n5_test))